From 7a0f5cc849c4e46fbed849dccce10fb2c7b41eaa Mon Sep 17 00:00:00 2001 From: zhiwuyazhe_fjr Date: Fri, 27 Mar 2026 12:39:26 +0800 Subject: [PATCH] feat(zotero): add CLI-Anything harness for Zotero desktop --- .gitignore | 4 + README.md | 8 + README_CN.md | 8 + registry.json | 16 +- zotero/agent-harness/ZOTERO.md | 460 ++++++++ .../cli_anything/zotero/README.md | 498 +++++++++ .../cli_anything/zotero/__init__.py | 5 + .../cli_anything/zotero/__main__.py | 5 + .../cli_anything/zotero/core/__init__.py | 1 + .../cli_anything/zotero/core/analysis.py | 166 +++ .../cli_anything/zotero/core/catalog.py | 252 +++++ .../cli_anything/zotero/core/discovery.py | 87 ++ .../cli_anything/zotero/core/experimental.py | 175 ++++ .../cli_anything/zotero/core/imports.py | 664 ++++++++++++ .../cli_anything/zotero/core/notes.py | 170 +++ .../cli_anything/zotero/core/rendering.py | 98 ++ .../cli_anything/zotero/core/session.py | 111 ++ .../cli_anything/zotero/skills/SKILL.md | 243 +++++ .../cli_anything/zotero/tests/TEST.md | 307 ++++++ .../cli_anything/zotero/tests/__init__.py | 1 + .../cli_anything/zotero/tests/_helpers.py | 705 +++++++++++++ .../zotero/tests/test_agent_harness.py | 55 + .../zotero/tests/test_cli_entrypoint.py | 410 ++++++++ .../cli_anything/zotero/tests/test_core.py | 683 ++++++++++++ .../zotero/tests/test_full_e2e.py | 351 +++++++ .../cli_anything/zotero/utils/__init__.py | 1 + .../cli_anything/zotero/utils/openai_api.py | 70 ++ .../cli_anything/zotero/utils/repl_skin.py | 521 ++++++++++ .../cli_anything/zotero/utils/zotero_http.py | 230 ++++ .../cli_anything/zotero/utils/zotero_paths.py | 298 ++++++ .../zotero/utils/zotero_sqlite.py | 743 +++++++++++++ .../cli_anything/zotero/zotero_cli.py | 984 ++++++++++++++++++ zotero/agent-harness/pyproject.toml | 3 + zotero/agent-harness/setup.py | 67 ++ zotero/agent-harness/skill_generator.py | 239 +++++ .../agent-harness/templates/SKILL.md.template | 53 + 36 files changed, 8691 insertions(+), 1 deletion(-) create mode 100644 zotero/agent-harness/ZOTERO.md create mode 100644 zotero/agent-harness/cli_anything/zotero/README.md create mode 100644 zotero/agent-harness/cli_anything/zotero/__init__.py create mode 100644 zotero/agent-harness/cli_anything/zotero/__main__.py create mode 100644 zotero/agent-harness/cli_anything/zotero/core/__init__.py create mode 100644 zotero/agent-harness/cli_anything/zotero/core/analysis.py create mode 100644 zotero/agent-harness/cli_anything/zotero/core/catalog.py create mode 100644 zotero/agent-harness/cli_anything/zotero/core/discovery.py create mode 100644 zotero/agent-harness/cli_anything/zotero/core/experimental.py create mode 100644 zotero/agent-harness/cli_anything/zotero/core/imports.py create mode 100644 zotero/agent-harness/cli_anything/zotero/core/notes.py create mode 100644 zotero/agent-harness/cli_anything/zotero/core/rendering.py create mode 100644 zotero/agent-harness/cli_anything/zotero/core/session.py create mode 100644 zotero/agent-harness/cli_anything/zotero/skills/SKILL.md create mode 100644 zotero/agent-harness/cli_anything/zotero/tests/TEST.md create mode 100644 zotero/agent-harness/cli_anything/zotero/tests/__init__.py create mode 100644 zotero/agent-harness/cli_anything/zotero/tests/_helpers.py create mode 100644 zotero/agent-harness/cli_anything/zotero/tests/test_agent_harness.py create mode 100644 zotero/agent-harness/cli_anything/zotero/tests/test_cli_entrypoint.py create mode 100644 zotero/agent-harness/cli_anything/zotero/tests/test_core.py create mode 100644 zotero/agent-harness/cli_anything/zotero/tests/test_full_e2e.py create mode 100644 zotero/agent-harness/cli_anything/zotero/utils/__init__.py create mode 100644 zotero/agent-harness/cli_anything/zotero/utils/openai_api.py create mode 100644 zotero/agent-harness/cli_anything/zotero/utils/repl_skin.py create mode 100644 zotero/agent-harness/cli_anything/zotero/utils/zotero_http.py create mode 100644 zotero/agent-harness/cli_anything/zotero/utils/zotero_paths.py create mode 100644 zotero/agent-harness/cli_anything/zotero/utils/zotero_sqlite.py create mode 100644 zotero/agent-harness/cli_anything/zotero/zotero_cli.py create mode 100644 zotero/agent-harness/pyproject.toml create mode 100644 zotero/agent-harness/setup.py create mode 100644 zotero/agent-harness/skill_generator.py create mode 100644 zotero/agent-harness/templates/SKILL.md.template diff --git a/.gitignore b/.gitignore index 8afe11ee9..c1bebbdf1 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ !/inkscape/ !/audacity/ !/libreoffice/ +!/zotero/ !/mubu/ !/obs-studio/ !/kdenlive/ @@ -68,6 +69,8 @@ /audacity/.* /libreoffice/* /libreoffice/.* +/zotero/* +/zotero/.* /mubu/* /mubu/.* /obs-studio/* @@ -107,6 +110,7 @@ !/inkscape/agent-harness/ !/audacity/agent-harness/ !/libreoffice/agent-harness/ +!/zotero/agent-harness/ !/mubu/agent-harness/ !/obs-studio/agent-harness/ !/kdenlive/agent-harness/ diff --git a/README.md b/README.md index 1dd2f6855..79ee44f52 100644 --- a/README.md +++ b/README.md @@ -633,6 +633,13 @@ Each application received complete, production-ready CLI interfaces โ€” not demo โœ… 158 +๐Ÿ“š Zotero +Reference Management +cli-anything-zotero +Local SQLite + connector + Local API +โœ… New + + ๐Ÿ“ Mubu Knowledge Management & Outlining cli-anything-mubu @@ -827,6 +834,7 @@ cli-anything/ โ”œโ”€โ”€ ๐ŸŽต audacity/agent-harness/ # Audacity CLI (161 tests) โ”œโ”€โ”€ ๐ŸŒ browser/agent-harness/ # Browser CLI (DOMShell MCP, new) โ”œโ”€โ”€ ๐Ÿ“„ libreoffice/agent-harness/ # LibreOffice CLI (158 tests) +โ”œโ”€โ”€ ๐Ÿ“š zotero/agent-harness/ # Zotero CLI (new, write import support) โ”œโ”€โ”€ ๐Ÿ“ mubu/agent-harness/ # Mubu CLI (96 tests) โ”œโ”€โ”€ ๐Ÿ“น obs-studio/agent-harness/ # OBS Studio CLI (153 tests) โ”œโ”€โ”€ ๐ŸŽž๏ธ kdenlive/agent-harness/ # Kdenlive CLI (155 tests) diff --git a/README_CN.md b/README_CN.md index 1f1c2e77c..36b61a8a4 100644 --- a/README_CN.md +++ b/README_CN.md @@ -525,6 +525,13 @@ CLI-Anything ้€‚็”จไบŽไปปไฝ•ๆœ‰ไปฃ็ ๅบ“็š„่ฝฏไปถ โ€”โ€” ไธ้™้ข†ๅŸŸ๏ผŒไธ้™ โœ… 158 +๐Ÿ“š Zotero +ๆ–‡็Œฎ็ฎก็†ไธŽๅผ•็”จ +cli-anything-zotero +Local SQLite + connector + Local API +โœ… ๆ–ฐๅขž + + ๐Ÿ“น OBS Studio ็›ดๆ’ญไธŽๅฝ•ๅˆถ cli-anything-obs-studio @@ -668,6 +675,7 @@ cli-anything/ โ”œโ”€โ”€ โœ๏ธ inkscape/agent-harness/ # Inkscape CLI๏ผˆ202 ้กนๆต‹่ฏ•๏ผ‰ โ”œโ”€โ”€ ๐ŸŽต audacity/agent-harness/ # Audacity CLI๏ผˆ161 ้กนๆต‹่ฏ•๏ผ‰ โ”œโ”€โ”€ ๐Ÿ“„ libreoffice/agent-harness/ # LibreOffice CLI๏ผˆ158 ้กนๆต‹่ฏ•๏ผ‰ +โ”œโ”€โ”€ ๐Ÿ“š zotero/agent-harness/ # Zotero CLI๏ผˆๆ–ฐๅขž๏ผŒๆ”ฏๆŒๆ–‡็Œฎๅฏผๅ…ฅ๏ผ‰ โ”œโ”€โ”€ ๐Ÿ“น obs-studio/agent-harness/ # OBS Studio CLI๏ผˆ153 ้กนๆต‹่ฏ•๏ผ‰ โ”œโ”€โ”€ ๐ŸŽž๏ธ kdenlive/agent-harness/ # Kdenlive CLI๏ผˆ155 ้กนๆต‹่ฏ•๏ผ‰ โ”œโ”€โ”€ ๐ŸŽฌ shotcut/agent-harness/ # Shotcut CLI๏ผˆ154 ้กนๆต‹่ฏ•๏ผ‰ diff --git a/registry.json b/registry.json index 8b49c4008..9c258942c 100644 --- a/registry.json +++ b/registry.json @@ -2,7 +2,7 @@ "meta": { "repo": "https://github.com/HKUDS/CLI-Anything", "description": "CLI-Hub โ€” Agent-native stateful CLI interfaces for softwares, codebases, and Web Services", - "updated": "2026-03-18" + "updated": "2026-03-26" }, "clis": [ { @@ -173,6 +173,20 @@ "contributor": "CLI-Anything-Team", "contributor_url": "https://github.com/HKUDS/CLI-Anything" }, + { + "name": "zotero", + "display_name": "Zotero", + "version": "0.1.0", + "description": "Reference management via local Zotero SQLite, connector, and Local API", + "requires": "Zotero desktop app", + "homepage": "https://www.zotero.org", + "install_cmd": "pip install git+https://github.com/HKUDS/CLI-Anything.git#subdirectory=zotero/agent-harness", + "entry_point": "cli-anything-zotero", + "skill_md": "zotero/agent-harness/cli_anything/zotero/skills/SKILL.md", + "category": "office", + "contributor": "zhiwuyazhe_fjr", + "contributor_url": "https://github.com/zhiwuyazhe_fjr" + }, { "name": "mubu", "display_name": "Mubu", diff --git a/zotero/agent-harness/ZOTERO.md b/zotero/agent-harness/ZOTERO.md new file mode 100644 index 000000000..045d38000 --- /dev/null +++ b/zotero/agent-harness/ZOTERO.md @@ -0,0 +1,460 @@ +# Zotero: Project-Specific Analysis and Operator Guide + +## Current Capability Snapshot + +### Stable and Supported + +- import literature into a specific collection through official connector flows +- attach local or downloaded PDFs during the same import session +- inspect libraries, collections, items, attachments, tags, styles, and saved searches +- find items by keyword or exact title +- read child notes under an item +- add a child note to an existing item through official connector save flows +- export RIS, BibTeX, BibLaTeX, CSL JSON, CSV, MODS, and Refer +- render citations and bibliography entries through Zotero's own CSL engine +- route stable read/search/export flows across both user and group libraries +- build LLM-ready structured context for one item +- optionally call OpenAI directly for analysis + +### Experimental Local Enhancements + +- create a collection by writing directly to `zotero.sqlite` +- add an existing top-level item to another collection +- move an existing top-level item between collections + +These experimental commands are intentionally not presented as official Zotero +API capabilities. They exist as local power-user tooling with explicit safety +guards. + +### Still Out of Scope + +- snapshot capture +- arbitrary existing-item attachment upload outside the current import session +- word-processor transaction integration +- privileged JavaScript execution inside Zotero +- standalone note creation +- group-library write support for experimental SQLite operations + +## Architecture Summary + +This harness treats Zotero as a layered desktop system: + +1. SQLite for local inventory and offline reads +2. connector endpoints for GUI-aware state and official write flows +3. Local API endpoints for live search, CSL rendering, and translator-backed export +4. experimental CLI-only SQLite writes for a few local library-management tasks + +The default rule is conservative: + +- use official Zotero surfaces whenever they exist +- do not reimplement translators or citeproc +- isolate non-official writes behind explicit `--experimental` + +## Source Anchors + +The implementation is derived from the installed Zotero source under: + +```text +C:\Program Files\Zotero +``` + +Primary anchors: + +- `app/omni.ja` + - `chrome/content/zotero/xpcom/server/server_localAPI.js` + - `chrome/content/zotero/xpcom/server/server_connector.js` + - `chrome/content/zotero/xpcom/server/server_connectorIntegration.js` + - `chrome/content/zotero/xpcom/server/saveSession.js` + - `chrome/content/zotero/modules/commandLineOptions.mjs` +- `defaults/preferences/zotero.js` + +Important constants from Zotero 7.0.32: + +- default HTTP port: `23119` +- Local API pref default: `extensions.zotero.httpServer.localAPI.enabled = false` +- connector liveness endpoint: `/connector/ping` +- selected collection endpoint: `/connector/getSelectedCollection` +- official connector write endpoints used here: + - `/connector/import` + - `/connector/saveItems` + - `/connector/saveAttachment` + - `/connector/updateSession` + +## Backend Responsibilities + +### SQLite + +Used for: + +- libraries +- collection listing, lookup, and tree building +- top-level item inventory +- child notes, attachments, and annotations +- tag lookup +- saved-search metadata +- style inventory +- experimental local collection writes + +Behavior notes: + +- regular inspection uses `mode=ro&immutable=1` +- no write path is shared with normal stable commands +- experimental writes open a separate transaction-only writable connection + +### Connector + +Used for: + +- liveness +- selected collection detection +- file import +- JSON item import +- import-time attachment upload through the same connector save session +- child note creation +- session retargeting and post-save tagging + +Behavior notes: + +- Zotero must be running +- write behavior depends on the live desktop app state +- import-time PDF attachment upload is limited to items created in the same connector session +- `note add` inherits connector constraints and therefore expects the GUI to be on the same library as the parent item + +### Local API + +Used for: + +- keyword item search +- citation rendering +- bibliography rendering +- export +- saved-search execution + +Behavior notes: + +- Zotero must be running +- Local API must be enabled in `user.js` or `prefs.js` +- stable read/search/export commands automatically switch between user and group Local API routes +- there is no fake local fallback for citeproc or translator export + +### OpenAI + +Used for: + +- optional `item analyze` + +Behavior notes: + +- requires `OPENAI_API_KEY` +- requires explicit `--model` +- recommended stable interface remains `item context` + +## How To Enable Local API + +### Recommended CLI Path + +```bash +cli-anything-zotero --json app enable-local-api +cli-anything-zotero --json app enable-local-api --launch +``` + +What this does: + +- resolves the active Zotero profile +- writes `extensions.zotero.httpServer.localAPI.enabled=true` into `user.js` +- reports whether the pref was already enabled +- optionally launches Zotero and verifies connector and Local API readiness + +### Manual Path + +Add this line to the active profile's `user.js`: + +```js +user_pref("extensions.zotero.httpServer.localAPI.enabled", true); +``` + +Then restart Zotero. + +### Verification + +Use either: + +```bash +cli-anything-zotero --json app status +cli-anything-zotero --json app ping +``` + +`app status` should show: + +- `local_api_enabled_configured: true` +- `local_api_available: true` once Zotero is running + +## Workflow Map + +### Import Into a Specific Collection + +Use: + +- `import file --collection ` +- `import json --collection ` +- `import file --attachments-manifest ` +- `import json ` with inline per-item `attachments` + +Backend: + +- connector + +Officiality: + +- official Zotero write flow +- attachment phase uses official `/connector/saveAttachment` in the same session + +### Find One Paper + +Use: + +- `item find ` +- `item find --exact-title` +- `item get ` + +Backend: + +- Local API for live keyword search +- SQLite for exact title or offline fallback + +### Read One Collection + +Use: + +- `collection find ` +- `collection get ` +- `collection items ` + +Backend: + +- SQLite + +### Read Notes for a Paper + +Use: + +- `item notes ` +- `note get ` + +Backend: + +- SQLite + +### Add a Note to a Paper + +Use: + +- `note add --text ...` +- `note add --file ... --format markdown` + +Backend: + +- connector `/connector/saveItems` + +### Export or Analyze a Paper + +Use: + +- `item export` +- `item citation` +- `item bibliography` +- `item context` +- `item analyze` + +Backends: + +- Local API for export/citation/bibliography +- SQLite plus optional Local API enrichment for `item context` +- OpenAI for `item analyze` + +### Re-file Existing Items + +Use: + +- `collection create ... --experimental` +- `item add-to-collection ... --experimental` +- `item move-to-collection ... --experimental` + +Backend: + +- experimental direct SQLite writes + +## Command Reference + +| Command | Purpose | Requires Zotero Running | Backend | Notes | +|---|---|---:|---|---| +| `app status` | Show runtime paths and backend availability | No | discovery | Includes profile, data dir, SQLite path, connector, Local API | +| `app version` | Show harness and Zotero version | No | discovery | Uses install metadata | +| `app launch` | Launch Zotero and wait for liveness | No | executable + connector | Waits for Local API too when configured | +| `app enable-local-api` | Enable Local API in `user.js` | No | prefs write | Safe, idempotent helper | +| `app ping` | Check connector liveness | Yes | connector | Only connector, not Local API | +| `collection list` | List collections | No | SQLite | Uses current library context | +| `collection find ` | Find collections by name | No | SQLite | Good for recovering keys/IDs | +| `collection tree` | Show nested collection structure | No | SQLite | Parent-child hierarchy | +| `collection get ` | Read one collection | No | SQLite | Accepts ID or key | +| `collection items ` | Read items in one collection | No | SQLite | Top-level items only | +| `collection use-selected` | Save GUI-selected collection into session | Yes | connector | Uses `/connector/getSelectedCollection` | +| `collection create --experimental` | Create a collection locally | No, Zotero must be closed | experimental SQLite | Automatic backup + transaction | +| `item list` | List top-level items | No | SQLite | Children are excluded | +| `item find ` | Find papers by keyword | Recommended | Local API + SQLite | Falls back to SQLite title search | +| `item find --exact-title` | Exact title lookup | No | SQLite | Stable offline path | +| `item get <ref>` | Read one item | No | SQLite | Returns fields, creators, tags | +| `item children <ref>` | Read all child records | No | SQLite | Includes notes, attachments, annotations | +| `item notes <ref>` | Read child notes only | No | SQLite | Purpose-built note listing | +| `item attachments <ref>` | Read attachment children | No | SQLite | Resolves `storage:` paths | +| `item file <ref>` | Resolve one attachment file | No | SQLite | Returns first child attachment for regular items | +| `item export <ref> --format <fmt>` | Translator-backed export | Yes | Local API | Zotero handles the export | +| `item citation <ref>` | CSL citation render | Yes | Local API | Supports style, locale, linkwrap | +| `item bibliography <ref>` | CSL bibliography render | Yes | Local API | Supports style, locale, linkwrap | +| `item context <ref>` | Build structured LLM-ready context | Optional | SQLite + optional Local API | Recommended stable AI interface | +| `item analyze <ref>` | Send context to OpenAI | API key required | OpenAI + local context | Model must be explicit | +| `item add-to-collection ... --experimental` | Append collection membership | No, Zotero must be closed | experimental SQLite | Does not remove existing memberships | +| `item move-to-collection ... --experimental` | Move item between collections | No, Zotero must be closed | experimental SQLite | Requires explicit sources or `--all-other-collections` | +| `note get <ref>` | Read one note | No | SQLite | Accepts note item ID or key | +| `note add <item-ref>` | Add a child note | Yes | connector | Parent item must be top-level | +| `search list` | List saved searches | No | SQLite | Metadata only | +| `search get <ref>` | Read one saved search definition | No | SQLite | Includes stored conditions | +| `search items <ref>` | Execute a saved search | Yes | Local API | Live command | +| `tag list` | List tags | No | SQLite | Includes item counts | +| `tag items <tag>` | Read items under one tag | No | SQLite | Tag string or tag ID | +| `style list` | Read installed CSL styles | No | local data dir | Parses local `.csl` files | +| `import file <path>` | Import through Zotero translators | Yes | connector | Supports optional `--attachments-manifest` sidecar | +| `import json <path>` | Save official connector JSON items | Yes | connector | Supports inline per-item `attachments` descriptors | +| `session *` | Persist current context | No | local state | REPL/session helper commands | + +## Item Search Behavior + +### `item find` + +Primary behavior: + +- when Local API is available, query the library-aware Zotero route: + - `/api/users/0/...` for the local user library + - `/api/groups/<libraryID>/...` for group libraries +- resolve Local API result keys back through SQLite so results always include local `itemID` and `key` + +Fallback behavior: + +- if Local API is unavailable or returns nothing useful, SQLite title search is used +- `--exact-title` always uses SQLite exact matching + +Reference behavior: + +- numeric IDs remain globally valid +- bare keys are accepted when they match exactly one library +- if a bare key matches multiple libraries, the CLI raises an ambiguity error and asks the caller to set `session use-library <id>` + +### Why `item get` and `item find` Are Separate + +- `item get` is precise lookup by `itemID` or `key` +- `item find` is discovery by keyword or title + +This keeps lookup stable and makes scripting more predictable. + +## Notes Model + +### `item notes` + +- entrypoint for listing note children under one paper +- returns notes only, not attachments or annotations + +### `note get` + +- reads one note record directly +- good for follow-up scripting when you already have a note key from `item notes` + +### `note add` + +- only child notes are supported in this harness version +- standalone notes are intentionally left out +- `text` and `markdown` are converted to safe HTML before submit +- `html` is accepted as-is + +## LLM and Analysis Model + +### Recommended Stable Interface: `item context` + +`item context` is the portable interface. It aggregates: + +- item fields +- creators and tags +- attachments +- optional notes +- optional exports such as BibTeX and CSL JSON +- optional DOI and URL links +- a prompt-ready `prompt_context` + +This is the recommended command if the caller already has its own LLM stack. + +### Optional Direct Interface: `item analyze` + +`item analyze` layers model calling on top of `item context`. + +Design choices: + +- requires `OPENAI_API_KEY` +- requires explicit `--model` +- does not hide missing-context uncertainty +- remains optional, not the only AI path + +## Experimental SQLite Write Model + +### Why It Exists + +Zotero's official HTTP surfaces cover import and note save well, but do not expose +general-purpose collection creation and arbitrary re-filing of existing items. + +This harness adds a narrow experimental SQLite write path. + +### Guardrails + +- `--experimental` is mandatory +- Zotero must be closed +- the database is backed up before each write +- each operation runs in a single transaction +- rollback occurs on failure +- only the local user library is supported + +### Semantics + +`item add-to-collection`: + +- append-only +- keeps all current collection memberships + +`item move-to-collection`: + +- first ensures target membership exists +- then removes memberships from `--from` collections or from all others when `--all-other-collections` is used +- does not delete implicitly without explicit source selection + +## SQLite Tables Used + +| CLI Area | Zotero Tables | +|---|---| +| Libraries | `libraries` | +| Collections | `collections`, `collectionItems` | +| Items | `items`, `itemTypes` | +| Fields and titles | `itemData`, `itemDataValues`, `fields` | +| Creators | `creators`, `itemCreators` | +| Tags | `tags`, `itemTags` | +| Notes | `itemNotes` | +| Attachments | `itemAttachments` | +| Annotations | `itemAnnotations` | +| Searches | `savedSearches`, `savedSearchConditions` | + +## Limitations + +- `item analyze` depends on external OpenAI credentials and network access +- `search items`, `item export`, `item citation`, and `item bibliography` require Local API +- `note add` depends on connector behavior and active GUI library context +- experimental SQLite write commands are local power features, not stable Zotero APIs +- no `saveSnapshot` +- import-time PDF attachment upload is supported, but arbitrary existing-item attachment upload is still out of scope +- no word-processor integration transaction client +- no privileged JavaScript execution inside Zotero diff --git a/zotero/agent-harness/cli_anything/zotero/README.md b/zotero/agent-harness/cli_anything/zotero/README.md new file mode 100644 index 000000000..885eddde9 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/README.md @@ -0,0 +1,498 @@ +# Zotero CLI Harness + +`cli-anything-zotero` is an agent-native CLI for Zotero desktop. It does not +reimplement Zotero. Instead, it composes Zotero's real local surfaces: + +- SQLite for offline, read-only inventory +- connector endpoints for GUI state and official write flows +- Local API for citation, bibliography, export, and live search + +## What It Is Good For + +This harness is designed for practical daily Zotero workflows: + +- import a RIS/BibTeX/JSON record into a chosen collection +- attach local or downloaded PDFs during the same import session +- find a paper by keyword or full title +- inspect one collection or one paper in detail +- read child notes and attachments +- add a child note to an existing item +- export BibTeX or CSL JSON for downstream tools +- generate structured context for an LLM +- optionally call OpenAI directly for analysis +- inspect, search, and export from both the local user library and group libraries +- experimentally create collections or re-file existing items when Zotero is closed + +## Requirements + +- Python 3.10+ +- Zotero desktop installed +- a local Zotero profile and data directory + +The Windows-first validation target for this harness is: + +```text +C:\Program Files\Zotero +``` + +## Install + +```bash +cd zotero/agent-harness +py -m pip install -e . +``` + +If `cli-anything-zotero` is not recognized afterwards, your Python Scripts +directory is likely not on `PATH`. You can still use: + +```bash +py -m cli_anything.zotero --help +``` + +## Local API + +Some commands require Zotero's Local API. Zotero 7 keeps it disabled by default. + +Enable it from the CLI: + +```bash +cli-anything-zotero --json app enable-local-api +cli-anything-zotero --json app enable-local-api --launch +``` + +Or manually add this to the active profile's `user.js`: + +```js +user_pref("extensions.zotero.httpServer.localAPI.enabled", true); +``` + +Then restart Zotero. + +## Quickstart + +```bash +cli-anything-zotero --json app status +cli-anything-zotero --json collection list +cli-anything-zotero --json item list --limit 10 +cli-anything-zotero --json item find "embodied intelligence" --limit 5 +cli-anything-zotero +``` + +## Library Context + +- stable read, search, export, citation, bibliography, and saved-search execution work for both the local user library and group libraries +- `session use-library 1` and `session use-library L1` are equivalent and persist the normalized `libraryID` +- if a bare key matches multiple libraries, the CLI raises an ambiguity error and asks you to set `session use-library <id>` before retrying +- experimental direct SQLite write commands remain limited to the local user library + +## Workflow Guide + +### 1. Import Literature Into a Specific Collection + +Use Zotero's official connector write path. + +```bash +cli-anything-zotero --json import file .\paper.ris --collection COLLAAAA --tag review +cli-anything-zotero --json import json .\items.json --collection COLLAAAA --tag imported +cli-anything-zotero --json import file .\paper.ris --collection COLLAAAA --attachments-manifest .\attachments.json +cli-anything-zotero --json import json .\items-with-pdf.json --collection COLLAAAA --attachment-timeout 90 +``` + +`import json` supports a harness-private inline `attachments` array on each item: + +```json +[ + { + "itemType": "journalArticle", + "title": "Embodied Intelligence Paper", + "attachments": [ + { "path": "C:\\papers\\embodied.pdf", "title": "PDF" }, + { "url": "https://example.org/embodied.pdf", "title": "Publisher PDF", "delay_ms": 500 } + ] + } +] +``` + +`import file` supports the same attachment descriptors through a sidecar manifest: + +```json +[ + { + "index": 0, + "expected_title": "Embodied Intelligence Paper", + "attachments": [ + { "path": "C:\\papers\\embodied.pdf", "title": "PDF" } + ] + } +] +``` + +Attachment behavior: + +- attachments are uploaded only for items created in the current import session +- local files and downloaded URLs must pass PDF magic-byte validation +- duplicate attachment descriptors for the same imported item are skipped idempotently +- if metadata import succeeds but one or more attachments fail, the command returns JSON with `status: "partial_success"` and exits non-zero + +When Zotero is running, target resolution is: + +1. explicit `--collection` +2. current session collection +3. current GUI-selected collection +4. user library + +Backend: + +- connector + +Zotero must be running: + +- yes + +### 2. Find a Collection + +```bash +cli-anything-zotero --json collection find "robotics" +``` + +Use this when you remember a folder name but not its key or ID. + +Backend: + +- SQLite + +Zotero must be running: + +- no + +### 3. Find a Paper by Keyword or Full Title + +```bash +cli-anything-zotero --json item find "foundation model" +cli-anything-zotero --json item find "A Very Specific Paper Title" --exact-title +cli-anything-zotero --json item find "vision" --collection COLLAAAA --limit 10 +``` + +Behavior: + +- default mode prefers Local API search and falls back to SQLite title search when needed +- when Local API is used, the harness automatically switches between `/api/users/0/...` and `/api/groups/<libraryID>/...` +- `--exact-title` forces exact title matching through SQLite +- results include `itemID` and `key`, so you can pass them directly to `item get` +- if a bare key is duplicated across libraries, set `session use-library <id>` to disambiguate follow-up commands + +Backend: + +- Local API first +- SQLite fallback + +Zotero must be running: + +- recommended for keyword search +- not required for exact-title search + +### 4. Read a Collection or One Item + +```bash +cli-anything-zotero --json collection items COLLAAAA +cli-anything-zotero --json item get REG12345 +cli-anything-zotero --json item attachments REG12345 +cli-anything-zotero --json item file REG12345 +``` + +Typical use: + +- read the papers under a collection +- inspect a single paper's fields, creators, and tags +- resolve the local PDF path for downstream processing + +Backend: + +- SQLite + +Zotero must be running: + +- no + +### 5. Read Notes for a Paper + +```bash +cli-anything-zotero --json item notes REG12345 +cli-anything-zotero --json note get NOTEKEY +``` + +Responsibilities: + +- `item notes` lists only child notes for the paper +- `note get` reads the full content of one note by item ID or key + +Backend: + +- SQLite + +Zotero must be running: + +- no + +### 6. Add a Child Note to a Paper + +```bash +cli-anything-zotero --json note add REG12345 --text "Key takeaway: ..." +cli-anything-zotero --json note add REG12345 --file .\summary.md --format markdown +``` + +Behavior: + +- always creates a child note attached to the specified paper +- `text` and `markdown` are converted to safe HTML before save +- `html` is passed through as-is + +Important connector note: + +- Zotero must be running +- the Zotero UI must currently be on the same library as the parent item + +Backend: + +- connector `/connector/saveItems` + +### 7. Export BibTeX, CSL JSON, and Citations + +```bash +cli-anything-zotero --json item export REG12345 --format bibtex +cli-anything-zotero --json item export REG12345 --format csljson +cli-anything-zotero --json item citation REG12345 --style apa --locale en-US +cli-anything-zotero --json item bibliography REG12345 --style apa --locale en-US +``` + +These commands automatically use the correct Local API scope for user and group libraries. + +Supported export formats: + +- `ris` +- `bibtex` +- `biblatex` +- `csljson` +- `csv` +- `mods` +- `refer` + +Backend: + +- Local API + +Zotero must be running: + +- yes + +### 8. Produce LLM-Ready Context + +```bash +cli-anything-zotero --json item context REG12345 --include-notes --include-links --include-bibtex +``` + +This command is the stable, model-independent path for AI workflows. It returns: + +- item metadata and fields +- attachments and local file paths +- optional notes +- optional BibTeX and CSL JSON +- optional DOI and URL links +- a `prompt_context` text block you can send to any LLM + +Backend: + +- SQLite +- optional Local API when BibTeX or CSL JSON export is requested + +### 9. Ask OpenAI to Analyze a Paper + +```bash +set OPENAI_API_KEY=... +cli-anything-zotero --json item analyze REG12345 --question "What is this paper's likely contribution?" --model gpt-5.4-mini --include-notes +``` + +Behavior: + +- builds the same structured context as `item context` +- adds links automatically +- sends the question and context to the OpenAI Responses API + +Requirements: + +- `OPENAI_API_KEY` +- explicit `--model` + +Recommended usage: + +- use `item context` when you want portable data +- use `item analyze` when you want an in-CLI answer + +### 10. Experimental Collection Refactoring + +These commands write directly to `zotero.sqlite` and are intentionally marked +experimental. + +```bash +cli-anything-zotero --json collection create "New Topic" --parent COLLAAAA --experimental +cli-anything-zotero --json item add-to-collection REG12345 COLLBBBB --experimental +cli-anything-zotero --json item move-to-collection REG67890 COLLAAAA --from COLLBBBB --experimental +cli-anything-zotero --json item move-to-collection REG67890 COLLAAAA --all-other-collections --experimental +``` + +Safety rules: + +- Zotero must be closed +- `--experimental` is mandatory +- the harness automatically backs up `zotero.sqlite` before the write +- commands run in a single transaction and roll back on failure +- only the local user library is supported for these experimental commands + +Semantics: + +- `add-to-collection` only appends a collection membership +- `move-to-collection` adds the target collection and removes memberships from the specified sources + +Backend: + +- experimental direct SQLite writes + +## Command Groups + +### `app` + +| Command | Purpose | Requires Zotero Running | Backend | +|---|---|---:|---| +| `status` | Show executable, profile, data dir, SQLite path, connector state, and Local API state | No | discovery + probes | +| `version` | Show package version and Zotero version | No | discovery | +| `launch` | Start Zotero and wait for liveness | No | executable + connector | +| `enable-local-api` | Enable the Local API in `user.js`, optionally launch and verify | No | profile prefs | +| `ping` | Check `/connector/ping` | Yes | connector | + +### `collection` + +| Command | Purpose | Requires Zotero Running | Backend | +|---|---|---:|---| +| `list` | List collections in the current library | No | SQLite | +| `find <query>` | Find collections by name | No | SQLite | +| `tree` | Show nested collection structure | No | SQLite | +| `get <ref>` | Read one collection by ID or key | No | SQLite | +| `items <ref>` | Read the items under one collection | No | SQLite | +| `use-selected` | Persist the currently selected GUI collection | Yes | connector | +| `create <name> --experimental` | Create a collection locally with backup protection | No, Zotero must be closed | experimental SQLite | + +### `item` + +| Command | Purpose | Requires Zotero Running | Backend | +|---|---|---:|---| +| `list` | List top-level regular items | No | SQLite | +| `find <query>` | Find papers by keyword or full title | Recommended | Local API + SQLite | +| `get <ref>` | Read a single item by ID or key | No | SQLite | +| `children <ref>` | Read notes, attachments, and annotations under an item | No | SQLite | +| `notes <ref>` | Read only child notes under an item | No | SQLite | +| `attachments <ref>` | Read attachment metadata and resolved paths | No | SQLite | +| `file <ref>` | Resolve one attachment file path | No | SQLite | +| `export <ref> --format <fmt>` | Export one item through Zotero translators | Yes | Local API | +| `citation <ref>` | Render one citation | Yes | Local API | +| `bibliography <ref>` | Render one bibliography entry | Yes | Local API | +| `context <ref>` | Build structured, LLM-ready context | Optional | SQLite + optional Local API | +| `analyze <ref>` | Send item context to OpenAI for analysis | Yes for exports only; API key required | OpenAI + local context | +| `add-to-collection <item> <collection> --experimental` | Append a collection membership | No, Zotero must be closed | experimental SQLite | +| `move-to-collection <item> <collection> --experimental` | Move an item between collections | No, Zotero must be closed | experimental SQLite | + +### `note` + +| Command | Purpose | Requires Zotero Running | Backend | +|---|---|---:|---| +| `get <ref>` | Read one note by ID or key | No | SQLite | +| `add <item-ref>` | Create a child note under an item | Yes | connector | + +### `search` + +| Command | Purpose | Requires Zotero Running | Backend | +|---|---|---:|---| +| `list` | List saved searches | No | SQLite | +| `get <ref>` | Read one saved search definition | No | SQLite | +| `items <ref>` | Execute one saved search | Yes | Local API | + +### `tag` + +| Command | Purpose | Requires Zotero Running | Backend | +|---|---|---:|---| +| `list` | List tags and item counts | No | SQLite | +| `items <tag>` | Read items carrying a tag | No | SQLite | + +### `style` + +| Command | Purpose | Requires Zotero Running | Backend | +|---|---|---:|---| +| `list` | Read installed CSL styles | No | SQLite data dir | + +### `import` + +| Command | Purpose | Requires Zotero Running | Backend | +|---|---|---:|---| +| `file <path>` | Import RIS/BibTeX/BibLaTeX/Refer and other translator-supported text files | Yes | connector | +| `json <path>` | Save official Zotero connector item JSON | Yes | connector | + +### `session` + +`session` keeps current library, collection, item, and command history for the +REPL and one-shot commands. + +## REPL + +Run without a subcommand to enter the stateful REPL: + +```bash +cli-anything-zotero +``` + +Useful builtins: + +- `help` +- `exit` +- `current-library` +- `current-collection` +- `current-item` +- `use-library <id-or-Lid>` +- `use-collection <id-or-key>` +- `use-item <id-or-key>` +- `use-selected` +- `status` +- `history` +- `state-path` + +## Testing + +```bash +py -m pip install -e . +py -m pytest cli_anything/zotero/tests/test_core.py -v +py -m pytest cli_anything/zotero/tests/test_cli_entrypoint.py -v +py -m pytest cli_anything/zotero/tests/test_agent_harness.py -v +py -m pytest cli_anything/zotero/tests/test_full_e2e.py -v -s +py -m pytest cli_anything/zotero/tests/ -v --tb=no + +set CLI_ANYTHING_FORCE_INSTALLED=1 +py -m pytest cli_anything/zotero/tests/test_cli_entrypoint.py -v +py -m pytest cli_anything/zotero/tests/test_full_e2e.py -v -s +``` + +Opt-in live write tests: + +```bash +set CLI_ANYTHING_ZOTERO_ENABLE_WRITE_E2E=1 +set CLI_ANYTHING_ZOTERO_IMPORT_TARGET=<collection-key-or-id> +py -m pytest cli_anything/zotero/tests/test_full_e2e.py -v -s +``` + +## Limitations + +- `item analyze` depends on `OPENAI_API_KEY` and an explicit model name +- `search items`, `item export`, `item citation`, and `item bibliography` require Local API +- `note add` depends on connector behavior and therefore expects the Zotero UI to be on the same library as the parent item +- experimental collection write commands are intentionally not presented as stable Zotero APIs +- no `saveSnapshot` +- import-time PDF attachments are supported, but arbitrary existing-item attachment upload is still out of scope +- no word-processor integration transaction client +- no privileged JavaScript execution inside Zotero diff --git a/zotero/agent-harness/cli_anything/zotero/__init__.py b/zotero/agent-harness/cli_anything/zotero/__init__.py new file mode 100644 index 000000000..ec18772ef --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/__init__.py @@ -0,0 +1,5 @@ +"""cli-anything-zotero package.""" + +__all__ = ["__version__"] + +__version__ = "0.1.0" diff --git a/zotero/agent-harness/cli_anything/zotero/__main__.py b/zotero/agent-harness/cli_anything/zotero/__main__.py new file mode 100644 index 000000000..b3164afc0 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/__main__.py @@ -0,0 +1,5 @@ +from cli_anything.zotero.zotero_cli import entrypoint + + +if __name__ == "__main__": + raise SystemExit(entrypoint()) diff --git a/zotero/agent-harness/cli_anything/zotero/core/__init__.py b/zotero/agent-harness/cli_anything/zotero/core/__init__.py new file mode 100644 index 000000000..1a9d71485 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/core/__init__.py @@ -0,0 +1 @@ +"""Core modules for cli-anything-zotero.""" diff --git a/zotero/agent-harness/cli_anything/zotero/core/analysis.py b/zotero/agent-harness/cli_anything/zotero/core/analysis.py new file mode 100644 index 000000000..241f29e25 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/core/analysis.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +import os +from typing import Any + +from cli_anything.zotero.core import notes as notes_core +from cli_anything.zotero.core.catalog import get_item, item_attachments +from cli_anything.zotero.core.discovery import RuntimeContext +from cli_anything.zotero.core import rendering +from cli_anything.zotero.utils import openai_api + + +def _creator_line(item: dict[str, Any]) -> str: + creators = item.get("creators") or [] + if not creators: + return "" + parts = [] + for creator in creators: + full_name = " ".join(part for part in [creator.get("firstName"), creator.get("lastName")] if part) + if not full_name: + full_name = str(creator.get("creatorID", "")) + parts.append(full_name) + return ", ".join(parts) + + +def _link_payload(item: dict[str, Any]) -> dict[str, str]: + fields = item.get("fields") or {} + links: dict[str, str] = {} + url = fields.get("url") + doi = fields.get("DOI") or fields.get("doi") + if url: + links["url"] = str(url) + if doi: + links["doi"] = str(doi) + links["doi_url"] = f"https://doi.org/{doi}" + return links + + +def _prompt_context(payload: dict[str, Any]) -> str: + item = payload["item"] + fields = item.get("fields") or {} + lines = [ + f"Title: {item.get('title') or ''}", + f"Item Key: {item.get('key') or ''}", + f"Item Type: {item.get('typeName') or ''}", + ] + creator_line = _creator_line(item) + if creator_line: + lines.append(f"Creators: {creator_line}") + for field_name in sorted(fields): + if field_name == "title": + continue + value = fields.get(field_name) + if value not in (None, ""): + lines.append(f"{field_name}: {value}") + + links = payload.get("links") or {} + if links: + lines.append("Links:") + for key, value in links.items(): + lines.append(f"- {key}: {value}") + + attachments = payload.get("attachments") or [] + if attachments: + lines.append("Attachments:") + for attachment in attachments: + lines.append( + f"- {attachment.get('title') or attachment.get('key')}: " + f"{attachment.get('resolvedPath') or attachment.get('path') or '<missing>'}" + ) + + notes = payload.get("notes") or [] + if notes: + lines.append("Notes:") + for note in notes: + lines.append(f"- {note.get('title') or note.get('key')}: {note.get('noteText') or note.get('notePreview')}") + + exports = payload.get("exports") or {} + if exports: + lines.append("Exports:") + for fmt, content in exports.items(): + lines.append(f"[{fmt}]") + lines.append(content) + + return "\n".join(lines).strip() + + +def build_item_context( + runtime: RuntimeContext, + ref: str | int | None, + *, + include_notes: bool = False, + include_bibtex: bool = False, + include_csljson: bool = False, + include_links: bool = False, + session: dict[str, Any] | None = None, +) -> dict[str, Any]: + item = get_item(runtime, ref, session=session) + attachments = item_attachments(runtime, item["key"], session=session) + notes: list[dict[str, Any]] = [] + if include_notes: + notes = notes_core.get_item_notes(runtime, item["key"], session=session) + + exports: dict[str, str] = {} + if include_bibtex: + exports["bibtex"] = rendering.export_item(runtime, item["key"], "bibtex", session=session)["content"] + if include_csljson: + exports["csljson"] = rendering.export_item(runtime, item["key"], "csljson", session=session)["content"] + + payload = { + "item": item, + "attachments": attachments, + "notes": notes, + "exports": exports, + "links": _link_payload(item) if include_links else {}, + } + payload["prompt_context"] = _prompt_context(payload) + return payload + + +def analyze_item( + runtime: RuntimeContext, + ref: str | int | None, + *, + question: str, + model: str, + include_notes: bool = False, + include_bibtex: bool = False, + include_csljson: bool = False, + session: dict[str, Any] | None = None, +) -> dict[str, Any]: + api_key = os.environ.get("OPENAI_API_KEY", "").strip() + if not api_key: + raise RuntimeError("OPENAI_API_KEY is not set. Use `item context` for model-independent output or configure the API key.") + + context_payload = build_item_context( + runtime, + ref, + include_notes=include_notes, + include_bibtex=include_bibtex, + include_csljson=include_csljson, + include_links=True, + session=session, + ) + input_text = ( + "Use the Zotero item context below to answer the user's question.\n\n" + f"Question:\n{question.strip()}\n\n" + f"Context:\n{context_payload['prompt_context']}" + ) + response = openai_api.create_text_response( + api_key=api_key, + model=model, + instructions=( + "You are analyzing a Zotero bibliographic record. Stay grounded in the provided context. " + "If the context is missing an answer, say so explicitly." + ), + input_text=input_text, + ) + return { + "itemKey": context_payload["item"]["key"], + "model": model, + "question": question, + "answer": response["answer"], + "responseID": response["response_id"], + "context": context_payload, + } diff --git a/zotero/agent-harness/cli_anything/zotero/core/catalog.py b/zotero/agent-harness/cli_anything/zotero/core/catalog.py new file mode 100644 index 000000000..db83f9817 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/core/catalog.py @@ -0,0 +1,252 @@ +from __future__ import annotations + +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Any + +from cli_anything.zotero.core.discovery import RuntimeContext +from cli_anything.zotero.utils import zotero_http, zotero_sqlite + + +def _require_sqlite(runtime: RuntimeContext) -> Path: + sqlite_path = runtime.environment.sqlite_path + if not sqlite_path.exists(): + raise FileNotFoundError(f"Zotero SQLite database not found: {sqlite_path}") + return sqlite_path + + +def resolve_library_id(runtime: RuntimeContext, library_ref: str | int | None) -> int | None: + if library_ref is None: + return None + sqlite_path = _require_sqlite(runtime) + library = zotero_sqlite.resolve_library(sqlite_path, library_ref) + if not library: + raise RuntimeError(f"Library not found: {library_ref}") + return int(library["libraryID"]) + + +def _default_library(runtime: RuntimeContext, session: dict[str, Any] | None = None) -> int: + session = session or {} + current_library_id = resolve_library_id(runtime, session.get("current_library")) + if current_library_id is not None: + return current_library_id + library_id = zotero_sqlite.default_library_id(_require_sqlite(runtime)) + if library_id is None: + raise RuntimeError("No Zotero libraries found in the local database") + return library_id + + +def local_api_scope(runtime: RuntimeContext, library_id: int) -> str: + library = zotero_sqlite.resolve_library(_require_sqlite(runtime), library_id) + if not library: + raise RuntimeError(f"Library not found: {library_id}") + if library["type"] == "user": + return "/api/users/0" + if library["type"] == "group": + return f"/api/groups/{int(library['libraryID'])}" + raise RuntimeError(f"Unsupported library type for Zotero Local API: {library['type']}") + + +def list_libraries(runtime: RuntimeContext) -> list[dict[str, Any]]: + return zotero_sqlite.fetch_libraries(_require_sqlite(runtime)) + + +def list_collections(runtime: RuntimeContext, session: dict[str, Any] | None = None) -> list[dict[str, Any]]: + return zotero_sqlite.fetch_collections(_require_sqlite(runtime), library_id=_default_library(runtime, session)) + + +def find_collections(runtime: RuntimeContext, query: str, *, limit: int = 20, session: dict[str, Any] | None = None) -> list[dict[str, Any]]: + return zotero_sqlite.find_collections(_require_sqlite(runtime), query, library_id=_default_library(runtime, session), limit=limit) + + +def collection_tree(runtime: RuntimeContext, session: dict[str, Any] | None = None) -> list[dict[str, Any]]: + return zotero_sqlite.build_collection_tree(list_collections(runtime, session=session)) + + +def get_collection(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> dict[str, Any]: + session = session or {} + resolved = ref if ref is not None else session.get("current_collection") + if resolved is None: + raise RuntimeError("Collection reference required or set it in session first") + collection = zotero_sqlite.resolve_collection( + _require_sqlite(runtime), + resolved, + library_id=resolve_library_id(runtime, session.get("current_library")), + ) + if not collection: + raise RuntimeError(f"Collection not found: {resolved}") + return collection + + +def collection_items(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> list[dict[str, Any]]: + collection = get_collection(runtime, ref, session=session) + return zotero_sqlite.fetch_items(_require_sqlite(runtime), library_id=int(collection["libraryID"]), collection_id=int(collection["collectionID"])) + + +def use_selected_collection(runtime: RuntimeContext) -> dict[str, Any]: + if not runtime.connector_available: + raise RuntimeError(f"Zotero connector is not available: {runtime.connector_message}") + return zotero_http.get_selected_collection(runtime.environment.port) + + +def list_items(runtime: RuntimeContext, session: dict[str, Any] | None = None, limit: int | None = None) -> list[dict[str, Any]]: + return zotero_sqlite.fetch_items(_require_sqlite(runtime), library_id=_default_library(runtime, session), limit=limit) + + +def find_items( + runtime: RuntimeContext, + query: str, + *, + collection_ref: str | None = None, + limit: int = 20, + exact_title: bool = False, + session: dict[str, Any] | None = None, +) -> list[dict[str, Any]]: + sqlite_path = _require_sqlite(runtime) + collection = None + if collection_ref: + collection = get_collection(runtime, collection_ref, session=session) + library_id = int(collection["libraryID"]) if collection else _default_library(runtime, session) + + if not exact_title and runtime.local_api_available: + scope = local_api_scope(runtime, library_id) + path = f"{scope}/collections/{collection['key']}/items/top" if collection else f"{scope}/items/top" + payload = zotero_http.local_api_get_json( + runtime.environment.port, + path, + params={"format": "json", "q": query, "limit": limit}, + ) + results: list[dict[str, Any]] = [] + for record in payload if isinstance(payload, list) else []: + key = record.get("key") if isinstance(record, dict) else None + if not key: + continue + resolved = zotero_sqlite.resolve_item(sqlite_path, key, library_id=library_id) + if resolved: + results.append(resolved) + if results: + return results[:limit] + + collection_id = int(collection["collectionID"]) if collection else None + return zotero_sqlite.find_items_by_title( + sqlite_path, + query, + library_id=library_id, + collection_id=collection_id, + limit=limit, + exact_title=exact_title, + ) + + +def get_item(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> dict[str, Any]: + session = session or {} + resolved = ref if ref is not None else session.get("current_item") + if resolved is None: + raise RuntimeError("Item reference required or set it in session first") + item = zotero_sqlite.resolve_item( + _require_sqlite(runtime), + resolved, + library_id=resolve_library_id(runtime, session.get("current_library")), + ) + if not item: + raise RuntimeError(f"Item not found: {resolved}") + return item + + +def item_children(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> list[dict[str, Any]]: + item = get_item(runtime, ref, session=session) + return zotero_sqlite.fetch_item_children(_require_sqlite(runtime), item["itemID"]) + + +def item_notes(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> list[dict[str, Any]]: + item = get_item(runtime, ref, session=session) + return zotero_sqlite.fetch_item_notes(_require_sqlite(runtime), item["itemID"]) + + +def item_attachments(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> list[dict[str, Any]]: + item = get_item(runtime, ref, session=session) + attachments = zotero_sqlite.fetch_item_attachments(_require_sqlite(runtime), item["itemID"]) + for attachment in attachments: + attachment["resolvedPath"] = zotero_sqlite.resolve_attachment_real_path(attachment, runtime.environment.data_dir) + return attachments + + +def item_file(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> dict[str, Any]: + item = get_item(runtime, ref, session=session) + target = item + if item["typeName"] != "attachment": + attachments = item_attachments(runtime, item["itemID"]) + if not attachments: + raise RuntimeError(f"No attachment file found for item: {item['key']}") + target = attachments[0] + resolved_path = zotero_sqlite.resolve_attachment_real_path(target, runtime.environment.data_dir) + return { + "itemID": target["itemID"], + "key": target["key"], + "title": target.get("title", ""), + "contentType": target.get("contentType"), + "path": target.get("attachmentPath"), + "resolvedPath": resolved_path, + "exists": bool(resolved_path and Path(resolved_path).exists()), + } + + +def list_searches(runtime: RuntimeContext, session: dict[str, Any] | None = None) -> list[dict[str, Any]]: + return zotero_sqlite.fetch_saved_searches(_require_sqlite(runtime), library_id=_default_library(runtime, session)) + + +def get_search(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> dict[str, Any]: + if ref is None: + raise RuntimeError("Search reference required") + session = session or {} + search = zotero_sqlite.resolve_saved_search( + _require_sqlite(runtime), + ref, + library_id=resolve_library_id(runtime, session.get("current_library")), + ) + if not search: + raise RuntimeError(f"Saved search not found: {ref}") + return search + + +def search_items(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> Any: + if not runtime.local_api_available: + raise RuntimeError("search items requires the Zotero Local API to be running and enabled") + search = get_search(runtime, ref, session=session) + scope = local_api_scope(runtime, int(search["libraryID"])) + return zotero_http.local_api_get_json( + runtime.environment.port, + f"{scope}/searches/{search['key']}/items", + params={"format": "json"}, + ) + + +def list_tags(runtime: RuntimeContext, session: dict[str, Any] | None = None) -> list[dict[str, Any]]: + return zotero_sqlite.fetch_tags(_require_sqlite(runtime), library_id=_default_library(runtime, session)) + + +def tag_items(runtime: RuntimeContext, tag_ref: str | int, session: dict[str, Any] | None = None) -> list[dict[str, Any]]: + return zotero_sqlite.fetch_tag_items(_require_sqlite(runtime), tag_ref, library_id=_default_library(runtime, session)) + + +def list_styles(runtime: RuntimeContext) -> list[dict[str, Any]]: + styles_dir = runtime.environment.styles_dir + if not styles_dir.exists(): + return [] + styles: list[dict[str, Any]] = [] + for path in sorted(styles_dir.glob("*.csl")): + try: + root = ET.parse(path).getroot() + except ET.ParseError: + styles.append({"path": str(path), "id": None, "title": path.stem, "valid": False}) + continue + style_id = None + title = None + for element in root.iter(): + tag = element.tag.split("}", 1)[-1] + if tag == "id" and style_id is None: + style_id = (element.text or "").strip() or None + if tag == "title" and title is None: + title = (element.text or "").strip() or None + styles.append({"path": str(path), "id": style_id, "title": title or path.stem, "valid": True}) + return styles diff --git a/zotero/agent-harness/cli_anything/zotero/core/discovery.py b/zotero/agent-harness/cli_anything/zotero/core/discovery.py new file mode 100644 index 000000000..b4e4dac74 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/core/discovery.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +import subprocess +from dataclasses import dataclass +from typing import Any, Optional + +from cli_anything.zotero.utils import zotero_http, zotero_paths + + +@dataclass +class RuntimeContext: + environment: zotero_paths.ZoteroEnvironment + backend: str + connector_available: bool + connector_message: str + local_api_available: bool + local_api_message: str + + def to_status_payload(self) -> dict[str, Any]: + payload = self.environment.to_dict() + payload.update( + { + "backend": self.backend, + "connector_available": self.connector_available, + "connector_message": self.connector_message, + "local_api_available": self.local_api_available, + "local_api_message": self.local_api_message, + } + ) + return payload + + +def build_runtime_context(*, backend: str = "auto", data_dir: str | None = None, profile_dir: str | None = None, executable: str | None = None) -> RuntimeContext: + environment = zotero_paths.build_environment( + explicit_data_dir=data_dir, + explicit_profile_dir=profile_dir, + explicit_executable=executable, + ) + connector_available, connector_message = zotero_http.connector_is_available(environment.port) + local_api_available, local_api_message = zotero_http.local_api_is_available(environment.port) + return RuntimeContext( + environment=environment, + backend=backend, + connector_available=connector_available, + connector_message=connector_message, + local_api_available=local_api_available, + local_api_message=local_api_message, + ) + + +def launch_zotero(runtime: RuntimeContext, wait_timeout: int = 30) -> dict[str, Any]: + executable = runtime.environment.executable + if executable is None: + raise RuntimeError("Zotero executable could not be resolved") + if not executable.exists(): + raise FileNotFoundError(f"Zotero executable not found: {executable}") + + process = subprocess.Popen([str(executable)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + connector_ready = zotero_http.wait_for_endpoint( + runtime.environment.port, + "/connector/ping", + timeout=wait_timeout, + ready_statuses=(200,), + ) + local_api_ready = False + if runtime.environment.local_api_enabled_configured: + local_api_ready = zotero_http.wait_for_endpoint( + runtime.environment.port, + "/api/", + timeout=wait_timeout, + headers={"Zotero-API-Version": zotero_http.LOCAL_API_VERSION}, + ready_statuses=(200,), + ) + return { + "action": "launch", + "pid": process.pid, + "connector_ready": connector_ready, + "local_api_ready": local_api_ready, + "wait_timeout": wait_timeout, + "executable": str(executable), + } + + +def ensure_live_api_enabled(profile_dir: Optional[str] = None) -> Optional[str]: + environment = zotero_paths.build_environment(explicit_profile_dir=profile_dir) + path = zotero_paths.ensure_local_api_enabled(environment.profile_dir) + return str(path) if path else None diff --git a/zotero/agent-harness/cli_anything/zotero/core/experimental.py b/zotero/agent-harness/cli_anything/zotero/core/experimental.py new file mode 100644 index 000000000..6ac632cd2 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/core/experimental.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +from typing import Any + +from cli_anything.zotero.core.discovery import RuntimeContext +from cli_anything.zotero.utils import zotero_sqlite + + +def _require_offline(runtime: RuntimeContext) -> None: + if runtime.connector_available: + raise RuntimeError("Experimental SQLite write commands require Zotero to be closed") + + +def _session_library_id(session: dict[str, Any] | None = None) -> int | None: + session = session or {} + current_library = session.get("current_library") + if current_library is None: + return None + return zotero_sqlite.normalize_library_ref(current_library) + + +def _require_user_library(runtime: RuntimeContext, library_id: int) -> None: + library = zotero_sqlite.resolve_library(runtime.environment.sqlite_path, library_id) + if not library: + raise RuntimeError(f"Library not found: {library_id}") + if library["type"] != "user": + raise RuntimeError("Experimental SQLite write commands currently support only the local user library") + + +def _user_library_id(runtime: RuntimeContext, library_ref: str | None, session: dict[str, Any] | None = None) -> int: + session = session or {} + candidate = library_ref or session.get("current_library") + if candidate: + library_id = zotero_sqlite.normalize_library_ref(candidate) + else: + library_id = zotero_sqlite.default_library_id(runtime.environment.sqlite_path) + if library_id is None: + raise RuntimeError("No Zotero libraries found") + + libraries = zotero_sqlite.fetch_libraries(runtime.environment.sqlite_path) + library = next((entry for entry in libraries if int(entry["libraryID"]) == int(library_id)), None) + if not library: + raise RuntimeError(f"Library not found: {library_id}") + if library["type"] != "user": + raise RuntimeError("Experimental SQLite write commands currently support only the local user library") + return int(library_id) + + +def create_collection( + runtime: RuntimeContext, + name: str, + *, + parent_ref: str | None = None, + library_ref: str | None = None, + session: dict[str, Any] | None = None, +) -> dict[str, Any]: + _require_offline(runtime) + parent = None + if parent_ref: + parent = zotero_sqlite.resolve_collection( + runtime.environment.sqlite_path, + parent_ref, + library_id=_session_library_id(session), + ) + if not parent: + raise RuntimeError(f"Parent collection not found: {parent_ref}") + + library_id = int(parent["libraryID"]) if parent else _user_library_id(runtime, library_ref, session=session) + if parent and library_ref is not None and library_id != _user_library_id(runtime, library_ref, session=session): + raise RuntimeError("Parent collection and explicit library do not match") + + created = zotero_sqlite.create_collection_record( + runtime.environment.sqlite_path, + name=name, + library_id=library_id, + parent_collection_id=int(parent["collectionID"]) if parent else None, + ) + created["action"] = "collection_create" + created["experimental"] = True + return created + + +def add_item_to_collection( + runtime: RuntimeContext, + item_ref: str, + collection_ref: str, + *, + session: dict[str, Any] | None = None, +) -> dict[str, Any]: + _require_offline(runtime) + library_id = _session_library_id(session) + item = zotero_sqlite.resolve_item(runtime.environment.sqlite_path, item_ref, library_id=library_id) + if not item: + raise RuntimeError(f"Item not found: {item_ref}") + if item.get("parentItemID") is not None: + raise RuntimeError("Only top-level items can be added directly to collections") + _require_user_library(runtime, int(item["libraryID"])) + + collection = zotero_sqlite.resolve_collection(runtime.environment.sqlite_path, collection_ref, library_id=library_id) + if not collection: + raise RuntimeError(f"Collection not found: {collection_ref}") + if int(item["libraryID"]) != int(collection["libraryID"]): + raise RuntimeError("Item and collection must belong to the same library") + + result = zotero_sqlite.add_item_to_collection_record( + runtime.environment.sqlite_path, + item_id=int(item["itemID"]), + collection_id=int(collection["collectionID"]), + ) + result.update( + { + "action": "item_add_to_collection", + "experimental": True, + "itemKey": item["key"], + "collectionKey": collection["key"], + } + ) + return result + + +def move_item_to_collection( + runtime: RuntimeContext, + item_ref: str, + collection_ref: str, + *, + from_refs: list[str] | tuple[str, ...] | None = None, + all_other_collections: bool = False, + session: dict[str, Any] | None = None, +) -> dict[str, Any]: + _require_offline(runtime) + if not from_refs and not all_other_collections: + raise RuntimeError("Provide `from_refs` or set `all_other_collections=True`") + + library_id = _session_library_id(session) + item = zotero_sqlite.resolve_item(runtime.environment.sqlite_path, item_ref, library_id=library_id) + if not item: + raise RuntimeError(f"Item not found: {item_ref}") + if item.get("parentItemID") is not None: + raise RuntimeError("Only top-level items can be moved directly between collections") + _require_user_library(runtime, int(item["libraryID"])) + + target = zotero_sqlite.resolve_collection(runtime.environment.sqlite_path, collection_ref, library_id=library_id) + if not target: + raise RuntimeError(f"Target collection not found: {collection_ref}") + if int(item["libraryID"]) != int(target["libraryID"]): + raise RuntimeError("Item and target collection must belong to the same library") + + current_memberships = zotero_sqlite.fetch_item_collections(runtime.environment.sqlite_path, item["itemID"]) + current_by_id = {int(collection["collectionID"]): collection for collection in current_memberships} + if all_other_collections: + source_collection_ids = [collection_id for collection_id in current_by_id if collection_id != int(target["collectionID"])] + else: + source_collection_ids = [] + for ref in from_refs or []: + collection = zotero_sqlite.resolve_collection(runtime.environment.sqlite_path, ref, library_id=library_id) + if not collection: + raise RuntimeError(f"Source collection not found: {ref}") + source_collection_ids.append(int(collection["collectionID"])) + + result = zotero_sqlite.move_item_between_collections_record( + runtime.environment.sqlite_path, + item_id=int(item["itemID"]), + target_collection_id=int(target["collectionID"]), + source_collection_ids=source_collection_ids, + ) + result.update( + { + "action": "item_move_to_collection", + "experimental": True, + "itemKey": item["key"], + "targetCollectionKey": target["key"], + "sourceCollectionIDs": source_collection_ids, + } + ) + return result diff --git a/zotero/agent-harness/cli_anything/zotero/core/imports.py b/zotero/agent-harness/cli_anything/zotero/core/imports.py new file mode 100644 index 000000000..b254edeaf --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/core/imports.py @@ -0,0 +1,664 @@ +from __future__ import annotations + +import hashlib +import json +import re +import time +import urllib.error +import urllib.parse +import urllib.request +import uuid +from pathlib import Path +from typing import Any + +from cli_anything.zotero.core.discovery import RuntimeContext +from cli_anything.zotero.utils import zotero_http, zotero_sqlite + + +_TREE_VIEW_ID_RE = re.compile(r"^[LC]\d+$") +_PDF_MAGIC = b"%PDF-" +_ATTACHMENT_RESULT_CREATED = "created" +_ATTACHMENT_RESULT_FAILED = "failed" +_ATTACHMENT_RESULT_SKIPPED = "skipped_duplicate" + + +def _require_connector(runtime: RuntimeContext) -> None: + if not runtime.connector_available: + raise RuntimeError(f"Zotero connector is not available: {runtime.connector_message}") + + +def _read_text_file(path: Path) -> str: + for encoding in ("utf-8", "utf-8-sig", "utf-16", "latin-1"): + try: + return path.read_text(encoding=encoding) + except UnicodeDecodeError: + continue + return path.read_text(errors="replace") + + +def _read_json_items(path: Path) -> list[dict[str, Any]]: + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise RuntimeError(f"Invalid JSON import file: {path}: {exc}") from exc + if isinstance(payload, dict): + payload = payload.get("items") + if not isinstance(payload, list): + raise RuntimeError("JSON import expects an array of official Zotero connector item objects") + normalized: list[dict[str, Any]] = [] + for index, item in enumerate(payload, start=1): + if not isinstance(item, dict): + raise RuntimeError(f"JSON import item {index} is not an object") + copied = dict(item) + copied.setdefault("id", f"cli-anything-zotero-{index}") + normalized.append(copied) + return normalized + + +def _read_json_payload(path: Path, *, label: str) -> Any: + try: + return json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise RuntimeError(f"Invalid JSON {label}: {path}: {exc}") from exc + + +def _default_user_library_target(runtime: RuntimeContext) -> str: + sqlite_path = runtime.environment.sqlite_path + if sqlite_path.exists(): + library_id = zotero_sqlite.default_library_id(sqlite_path) + if library_id is not None: + return f"L{library_id}" + return "L1" + + +def _session_library_id(session: dict[str, Any] | None) -> int | None: + session = session or {} + current_library = session.get("current_library") + if current_library is None: + return None + return zotero_sqlite.normalize_library_ref(current_library) + + +def _resolve_target(runtime: RuntimeContext, collection_ref: str | None, session: dict[str, Any] | None = None) -> dict[str, Any]: + session = session or {} + session_library_id = _session_library_id(session) + if collection_ref: + if _TREE_VIEW_ID_RE.match(collection_ref): + kind = "library" if collection_ref.startswith("L") else "collection" + return {"treeViewID": collection_ref, "source": "explicit", "kind": kind} + collection = zotero_sqlite.resolve_collection( + runtime.environment.sqlite_path, + collection_ref, + library_id=session_library_id, + ) + if not collection: + raise RuntimeError(f"Collection not found: {collection_ref}") + return { + "treeViewID": f"C{collection['collectionID']}", + "source": "explicit", + "kind": "collection", + "collectionID": collection["collectionID"], + "collectionKey": collection["key"], + "collectionName": collection["collectionName"], + "libraryID": collection["libraryID"], + } + + current_collection = session.get("current_collection") + if current_collection: + if _TREE_VIEW_ID_RE.match(str(current_collection)): + kind = "library" if str(current_collection).startswith("L") else "collection" + return {"treeViewID": str(current_collection), "source": "session", "kind": kind} + collection = zotero_sqlite.resolve_collection( + runtime.environment.sqlite_path, + current_collection, + library_id=session_library_id, + ) + if collection: + return { + "treeViewID": f"C{collection['collectionID']}", + "source": "session", + "kind": "collection", + "collectionID": collection["collectionID"], + "collectionKey": collection["key"], + "collectionName": collection["collectionName"], + "libraryID": collection["libraryID"], + } + + if runtime.connector_available: + selected = zotero_http.get_selected_collection(runtime.environment.port) + if selected.get("id") is not None: + return { + "treeViewID": f"C{selected['id']}", + "source": "selected", + "kind": "collection", + "collectionID": selected["id"], + "collectionName": selected.get("name"), + "libraryID": selected.get("libraryID"), + "libraryName": selected.get("libraryName"), + } + return { + "treeViewID": f"L{selected['libraryID']}", + "source": "selected", + "kind": "library", + "libraryID": selected.get("libraryID"), + "libraryName": selected.get("libraryName"), + } + + return { + "treeViewID": _default_user_library_target(runtime), + "source": "user_library", + "kind": "library", + } + + +def _normalize_tags(tags: list[str] | tuple[str, ...]) -> list[str]: + return [tag.strip() for tag in tags if tag and tag.strip()] + + +def _session_id(prefix: str) -> str: + return f"{prefix}-{uuid.uuid4().hex}" + + +def _normalize_attachment_int(value: Any, *, name: str, minimum: int) -> int: + try: + normalized = int(value) + except (TypeError, ValueError) as exc: + raise RuntimeError(f"Attachment `{name}` must be an integer") from exc + if normalized < minimum: + comparator = "greater than or equal to" if minimum == 0 else f"at least {minimum}" + raise RuntimeError(f"Attachment `{name}` must be {comparator}") + return normalized + + +def _normalize_attachment_descriptor( + raw: Any, + *, + index_label: str, + attachment_label: str, + default_delay_ms: int, + default_timeout: int, +) -> dict[str, Any]: + if not isinstance(raw, dict): + raise RuntimeError(f"{index_label} {attachment_label} must be an object") + has_path = "path" in raw and raw.get("path") not in (None, "") + has_url = "url" in raw and raw.get("url") not in (None, "") + if has_path == has_url: + raise RuntimeError(f"{index_label} {attachment_label} must include exactly one of `path` or `url`") + title = str(raw.get("title") or "PDF").strip() or "PDF" + delay_ms = _normalize_attachment_int(raw.get("delay_ms", default_delay_ms), name="delay_ms", minimum=0) + timeout = _normalize_attachment_int(raw.get("timeout", default_timeout), name="timeout", minimum=1) + if has_path: + source = str(raw["path"]).strip() + if not source: + raise RuntimeError(f"{index_label} {attachment_label} path must not be empty") + return { + "source_type": "file", + "source": source, + "title": title, + "delay_ms": delay_ms, + "timeout": timeout, + } + source = str(raw["url"]).strip() + if not source: + raise RuntimeError(f"{index_label} {attachment_label} url must not be empty") + return { + "source_type": "url", + "source": source, + "title": title, + "delay_ms": delay_ms, + "timeout": timeout, + } + + +def _extract_inline_attachment_plans( + items: list[dict[str, Any]], + *, + default_delay_ms: int, + default_timeout: int, +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + stripped_items: list[dict[str, Any]] = [] + plans: list[dict[str, Any]] = [] + for index, item in enumerate(items): + copied = dict(item) + raw_attachments = copied.pop("attachments", []) + if raw_attachments in (None, []): + stripped_items.append(copied) + continue + if not isinstance(raw_attachments, list): + raise RuntimeError(f"JSON import item {index + 1} attachments must be an array") + normalized = [ + _normalize_attachment_descriptor( + descriptor, + index_label=f"JSON import item {index + 1}", + attachment_label=f"attachment {attachment_index + 1}", + default_delay_ms=default_delay_ms, + default_timeout=default_timeout, + ) + for attachment_index, descriptor in enumerate(raw_attachments) + ] + plans.append({"index": index, "attachments": normalized}) + stripped_items.append(copied) + return stripped_items, plans + + +def _read_attachment_manifest( + path: Path, + *, + default_delay_ms: int, + default_timeout: int, +) -> list[dict[str, Any]]: + payload = _read_json_payload(path, label="attachment manifest") + if not isinstance(payload, list): + raise RuntimeError("Attachment manifest expects an array of {index, attachments} objects") + manifest: list[dict[str, Any]] = [] + seen_indexes: set[int] = set() + for entry_index, entry in enumerate(payload, start=1): + label = f"manifest entry {entry_index}" + if not isinstance(entry, dict): + raise RuntimeError(f"{label} must be an object") + if "index" not in entry: + raise RuntimeError(f"{label} is missing required `index`") + index = _normalize_attachment_int(entry["index"], name="index", minimum=0) + if index in seen_indexes: + raise RuntimeError(f"{label} reuses import index {index}") + seen_indexes.add(index) + attachments = entry.get("attachments") + if not isinstance(attachments, list): + raise RuntimeError(f"{label} attachments must be an array") + normalized = [ + _normalize_attachment_descriptor( + descriptor, + index_label=label, + attachment_label=f"attachment {attachment_index + 1}", + default_delay_ms=default_delay_ms, + default_timeout=default_timeout, + ) + for attachment_index, descriptor in enumerate(attachments) + ] + expected_title = entry.get("expected_title") + if expected_title is not None and not isinstance(expected_title, str): + raise RuntimeError(f"{label} expected_title must be a string") + manifest.append( + { + "index": index, + "expected_title": expected_title, + "attachments": normalized, + } + ) + return manifest + + +def _item_title(item: dict[str, Any]) -> str | None: + for field in ("title", "bookTitle", "publicationTitle"): + value = item.get(field) + if value: + return str(value) + return None + + +def _normalize_url_for_dedupe(url: str) -> str: + parsed = urllib.parse.urlsplit(url.strip()) + normalized_path = parsed.path or "/" + return urllib.parse.urlunsplit((parsed.scheme.lower(), parsed.netloc.lower(), normalized_path, parsed.query, "")) + + +def _attachment_result( + *, + item_index: int, + parent_connector_id: Any, + descriptor: dict[str, Any], + status: str, + error: str | None = None, +) -> dict[str, Any]: + payload = { + "item_index": item_index, + "parent_connector_id": parent_connector_id, + "source_type": descriptor["source_type"], + "source": descriptor["source"], + "title": descriptor["title"], + "status": status, + } + if error is not None: + payload["error"] = error + return payload + + +def _attachment_summary(results: list[dict[str, Any]]) -> dict[str, Any]: + return { + "planned_count": len(results), + "created_count": sum(1 for result in results if result["status"] == _ATTACHMENT_RESULT_CREATED), + "failed_count": sum(1 for result in results if result["status"] == _ATTACHMENT_RESULT_FAILED), + "skipped_count": sum(1 for result in results if result["status"] == _ATTACHMENT_RESULT_SKIPPED), + } + + +def _ensure_pdf_bytes(content: bytes, *, source: str) -> None: + if not content.startswith(_PDF_MAGIC): + raise RuntimeError(f"Attachment source is not a PDF: {source}") + + +def _read_local_pdf(path_text: str) -> tuple[bytes, str]: + path = Path(path_text).expanduser() + if not path.exists(): + raise FileNotFoundError(f"Attachment file not found: {path}") + resolved = path.resolve() + content = resolved.read_bytes() + _ensure_pdf_bytes(content, source=str(resolved)) + return content, resolved.as_uri() + + +def _download_remote_pdf(url: str, *, delay_ms: int, timeout: int) -> bytes: + if delay_ms: + time.sleep(delay_ms / 1000) + request = urllib.request.Request(url, headers={"Accept": "application/pdf,application/octet-stream;q=0.9,*/*;q=0.1"}) + try: + with urllib.request.urlopen(request, timeout=timeout) as response: + status = getattr(response, "status", response.getcode()) + if int(status) != 200: + raise RuntimeError(f"Attachment download returned HTTP {status}: {url}") + content = response.read() + except urllib.error.HTTPError as exc: + raise RuntimeError(f"Attachment download returned HTTP {exc.code}: {url}") from exc + except urllib.error.URLError as exc: + raise RuntimeError(f"Attachment download failed for {url}: {exc.reason}") from exc + _ensure_pdf_bytes(content, source=url) + return content + + +def _perform_attachment_upload( + runtime: RuntimeContext, + *, + session_id: str, + connector_items: list[dict[str, Any]], + plans: list[dict[str, Any]], +) -> tuple[dict[str, Any], list[dict[str, Any]]]: + results: list[dict[str, Any]] = [] + seen_by_item: dict[str, dict[str, set[str]]] = {} + for plan in plans: + item_index = int(plan["index"]) + attachments = list(plan.get("attachments") or []) + imported_item = connector_items[item_index] if 0 <= item_index < len(connector_items) else None + expected_title = plan.get("expected_title") + if imported_item is None: + message = f"Import returned no item at index {item_index}" + results.extend( + _attachment_result( + item_index=item_index, + parent_connector_id=None, + descriptor=descriptor, + status=_ATTACHMENT_RESULT_FAILED, + error=message, + ) + for descriptor in attachments + ) + continue + imported_title = _item_title(imported_item) + if expected_title is not None and imported_title != expected_title: + message = ( + f"Imported item title mismatch at index {item_index}: " + f"expected {expected_title!r}, got {imported_title!r}" + ) + results.extend( + _attachment_result( + item_index=item_index, + parent_connector_id=imported_item.get("id"), + descriptor=descriptor, + status=_ATTACHMENT_RESULT_FAILED, + error=message, + ) + for descriptor in attachments + ) + continue + parent_connector_id = imported_item.get("id") + if not parent_connector_id: + message = f"Imported item at index {item_index} did not include a connector id" + results.extend( + _attachment_result( + item_index=item_index, + parent_connector_id=None, + descriptor=descriptor, + status=_ATTACHMENT_RESULT_FAILED, + error=message, + ) + for descriptor in attachments + ) + continue + + dedupe_state = seen_by_item.setdefault( + str(parent_connector_id), + {"paths": set(), "urls": set(), "hashes": set()}, + ) + for descriptor in attachments: + try: + if descriptor["source_type"] == "file": + canonical_path = str(Path(descriptor["source"]).expanduser().resolve()) + if canonical_path in dedupe_state["paths"]: + results.append( + _attachment_result( + item_index=item_index, + parent_connector_id=parent_connector_id, + descriptor=descriptor, + status=_ATTACHMENT_RESULT_SKIPPED, + ) + ) + continue + content, metadata_url = _read_local_pdf(descriptor["source"]) + else: + normalized_url = _normalize_url_for_dedupe(descriptor["source"]) + if normalized_url in dedupe_state["urls"]: + results.append( + _attachment_result( + item_index=item_index, + parent_connector_id=parent_connector_id, + descriptor=descriptor, + status=_ATTACHMENT_RESULT_SKIPPED, + ) + ) + continue + content = _download_remote_pdf( + descriptor["source"], + delay_ms=int(descriptor["delay_ms"]), + timeout=int(descriptor["timeout"]), + ) + metadata_url = descriptor["source"] + + content_hash = hashlib.sha256(content).hexdigest() + if content_hash in dedupe_state["hashes"]: + results.append( + _attachment_result( + item_index=item_index, + parent_connector_id=parent_connector_id, + descriptor=descriptor, + status=_ATTACHMENT_RESULT_SKIPPED, + ) + ) + continue + + zotero_http.connector_save_attachment( + runtime.environment.port, + session_id=session_id, + parent_item_id=parent_connector_id, + title=descriptor["title"], + url=metadata_url, + content=content, + timeout=int(descriptor["timeout"]), + ) + dedupe_state["hashes"].add(content_hash) + if descriptor["source_type"] == "file": + dedupe_state["paths"].add(canonical_path) + else: + dedupe_state["urls"].add(normalized_url) + results.append( + _attachment_result( + item_index=item_index, + parent_connector_id=parent_connector_id, + descriptor=descriptor, + status=_ATTACHMENT_RESULT_CREATED, + ) + ) + except Exception as exc: + results.append( + _attachment_result( + item_index=item_index, + parent_connector_id=parent_connector_id, + descriptor=descriptor, + status=_ATTACHMENT_RESULT_FAILED, + error=str(exc), + ) + ) + return _attachment_summary(results), results + + +def enable_local_api( + runtime: RuntimeContext, + *, + launch: bool = False, + wait_timeout: int = 30, +) -> dict[str, Any]: + profile_dir = runtime.environment.profile_dir + if profile_dir is None: + raise RuntimeError("Active Zotero profile could not be resolved") + before = runtime.environment.local_api_enabled_configured + written_path = runtime.environment.profile_dir / "user.js" + from cli_anything.zotero.utils import zotero_paths # local import to avoid cycle + zotero_paths.ensure_local_api_enabled(profile_dir) + payload = { + "profile_dir": str(profile_dir), + "user_js_path": str(written_path), + "already_enabled": before, + "enabled": True, + "launched": False, + "connector_ready": runtime.connector_available, + "local_api_ready": runtime.local_api_available, + } + if launch: + from cli_anything.zotero.core import discovery # local import to avoid cycle + refreshed = discovery.build_runtime_context( + backend=runtime.backend, + data_dir=str(runtime.environment.data_dir), + profile_dir=str(profile_dir), + executable=str(runtime.environment.executable) if runtime.environment.executable else None, + ) + launch_payload = discovery.launch_zotero(refreshed, wait_timeout=wait_timeout) + payload.update( + { + "launched": True, + "launch": launch_payload, + "connector_ready": launch_payload["connector_ready"], + "local_api_ready": launch_payload["local_api_ready"], + } + ) + return payload + + +def import_file( + runtime: RuntimeContext, + path: str | Path, + *, + collection_ref: str | None = None, + tags: list[str] | tuple[str, ...] = (), + session: dict[str, Any] | None = None, + attachments_manifest: str | Path | None = None, + attachment_delay_ms: int = 0, + attachment_timeout: int = 60, +) -> dict[str, Any]: + _require_connector(runtime) + source_path = Path(path).expanduser() + if not source_path.exists(): + raise FileNotFoundError(f"Import file not found: {source_path}") + content = _read_text_file(source_path) + manifest_path = Path(attachments_manifest).expanduser() if attachments_manifest is not None else None + plans = ( + _read_attachment_manifest( + manifest_path, + default_delay_ms=attachment_delay_ms, + default_timeout=attachment_timeout, + ) + if manifest_path is not None + else [] + ) + session_id = _session_id("import-file") + imported = zotero_http.connector_import_text(runtime.environment.port, content, session_id=session_id) + target = _resolve_target(runtime, collection_ref, session=session) + normalized_tags = _normalize_tags(list(tags)) + zotero_http.connector_update_session( + runtime.environment.port, + session_id=session_id, + target=target["treeViewID"], + tags=normalized_tags, + ) + attachment_summary, attachment_results = _perform_attachment_upload( + runtime, + session_id=session_id, + connector_items=imported, + plans=plans, + ) + return { + "action": "import_file", + "path": str(source_path), + "status": "partial_success" if attachment_summary["failed_count"] else "success", + "sessionID": session_id, + "target": target, + "tags": normalized_tags, + "imported_count": len(imported), + "items": imported, + "attachment_summary": attachment_summary, + "attachment_results": attachment_results, + } + + +def import_json( + runtime: RuntimeContext, + path: str | Path, + *, + collection_ref: str | None = None, + tags: list[str] | tuple[str, ...] = (), + session: dict[str, Any] | None = None, + attachment_delay_ms: int = 0, + attachment_timeout: int = 60, +) -> dict[str, Any]: + _require_connector(runtime) + source_path = Path(path).expanduser() + if not source_path.exists(): + raise FileNotFoundError(f"Import JSON file not found: {source_path}") + items = _read_json_items(source_path) + items, plans = _extract_inline_attachment_plans( + items, + default_delay_ms=attachment_delay_ms, + default_timeout=attachment_timeout, + ) + session_id = _session_id("import-json") + zotero_http.connector_save_items(runtime.environment.port, items, session_id=session_id) + target = _resolve_target(runtime, collection_ref, session=session) + normalized_tags = _normalize_tags(list(tags)) + zotero_http.connector_update_session( + runtime.environment.port, + session_id=session_id, + target=target["treeViewID"], + tags=normalized_tags, + ) + attachment_summary, attachment_results = _perform_attachment_upload( + runtime, + session_id=session_id, + connector_items=items, + plans=plans, + ) + return { + "action": "import_json", + "path": str(source_path), + "status": "partial_success" if attachment_summary["failed_count"] else "success", + "sessionID": session_id, + "target": target, + "tags": normalized_tags, + "submitted_count": len(items), + "items": [ + { + "id": item.get("id"), + "itemType": item.get("itemType"), + "title": item.get("title") or item.get("bookTitle") or item.get("publicationTitle"), + } + for item in items + ], + "attachment_summary": attachment_summary, + "attachment_results": attachment_results, + } diff --git a/zotero/agent-harness/cli_anything/zotero/core/notes.py b/zotero/agent-harness/cli_anything/zotero/core/notes.py new file mode 100644 index 000000000..b13835770 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/core/notes.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +import html +import re +import uuid +from pathlib import Path +from typing import Any + +from cli_anything.zotero.core.catalog import get_item +from cli_anything.zotero.core.discovery import RuntimeContext +from cli_anything.zotero.utils import zotero_http, zotero_sqlite + + +def _require_connector(runtime: RuntimeContext) -> None: + if not runtime.connector_available: + raise RuntimeError(f"Zotero connector is not available: {runtime.connector_message}") + + +def get_note(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> dict[str, Any]: + if ref is None: + raise RuntimeError("Note reference required") + session = session or {} + library_id = session.get("current_library") + note = zotero_sqlite.resolve_item( + runtime.environment.sqlite_path, + ref, + library_id=zotero_sqlite.normalize_library_ref(library_id) if library_id is not None else None, + ) + if not note: + raise RuntimeError(f"Note not found: {ref}") + if note["typeName"] != "note": + raise RuntimeError(f"Item is not a note: {ref}") + return note + + +def get_item_notes(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> list[dict[str, Any]]: + parent_item = get_item(runtime, ref, session=session) + return zotero_sqlite.fetch_item_notes(runtime.environment.sqlite_path, parent_item["itemID"]) + + +def _html_paragraphs(text: str) -> str: + paragraphs = [segment.strip() for segment in text.replace("\r\n", "\n").replace("\r", "\n").split("\n\n") if segment.strip()] + if not paragraphs: + paragraphs = [text.strip()] + rendered = [] + for paragraph in paragraphs: + escaped = html.escape(paragraph).replace("\n", "<br/>") + rendered.append(f"<p>{escaped}</p>") + return "".join(rendered) + + +def _simple_markdown_to_safe_html(text: str) -> str: + lines = text.replace("\r\n", "\n").replace("\r", "\n").split("\n") + rendered: list[str] = [] + in_list = False + paragraph: list[str] = [] + + def flush_paragraph() -> None: + nonlocal paragraph + if not paragraph: + return + rendered.append(f"<p>{_render_markdown_inline(' '.join(paragraph))}</p>") + paragraph = [] + + def flush_list() -> None: + nonlocal in_list + if in_list: + rendered.append("</ul>") + in_list = False + + for raw_line in lines: + line = raw_line.rstrip() + if not line.strip(): + flush_paragraph() + flush_list() + continue + if line.startswith(("- ", "* ")): + flush_paragraph() + if not in_list: + rendered.append("<ul>") + in_list = True + rendered.append(f"<li>{_render_markdown_inline(line[2:].strip())}</li>") + continue + match = re.match(r"^(#{1,6})\s+(.*)$", line) + if match: + flush_paragraph() + flush_list() + level = len(match.group(1)) + rendered.append(f"<h{level}>{_render_markdown_inline(match.group(2).strip())}</h{level}>") + continue + flush_list() + paragraph.append(line.strip()) + + flush_paragraph() + flush_list() + return "".join(rendered) + + +def _render_markdown_inline(text: str) -> str: + escaped = html.escape(text) + escaped = re.sub(r"`([^`]+)`", r"<code>\1</code>", escaped) + escaped = re.sub(r"\*\*([^*]+)\*\*", r"<strong>\1</strong>", escaped) + escaped = re.sub(r"\*([^*]+)\*", r"<em>\1</em>", escaped) + return escaped + + +def _normalize_note_html(content: str, fmt: str) -> str: + fmt = fmt.lower() + if fmt == "html": + return content + if fmt == "markdown": + return _simple_markdown_to_safe_html(content) + if fmt == "text": + return _html_paragraphs(content) + raise RuntimeError(f"Unsupported note format: {fmt}") + + +def add_note( + runtime: RuntimeContext, + item_ref: str | int, + *, + text: str | None = None, + file_path: str | Path | None = None, + fmt: str = "text", + session: dict[str, Any] | None = None, +) -> dict[str, Any]: + _require_connector(runtime) + if (text is None and file_path is None) or (text is not None and file_path is not None): + raise RuntimeError("Provide exactly one of `text` or `file_path`") + + parent_item = get_item(runtime, item_ref, session=session) + if parent_item["typeName"] in {"note", "attachment", "annotation"}: + raise RuntimeError("Child notes can only be attached to top-level bibliographic items") + + selected = zotero_http.get_selected_collection(runtime.environment.port) + selected_library_id = selected.get("libraryID") + if selected_library_id is not None and int(selected_library_id) != int(parent_item["libraryID"]): + raise RuntimeError( + "note add requires Zotero to have the same library selected as the parent item. " + "Switch the Zotero UI to that library and retry." + ) + + if file_path is not None: + content = Path(file_path).expanduser().read_text(encoding="utf-8") + else: + content = text or "" + + note_html = _normalize_note_html(content, fmt) + session_id = f"note-add-{uuid.uuid4().hex}" + zotero_http.connector_save_items( + runtime.environment.port, + [ + { + "id": session_id, + "itemType": "note", + "note": note_html, + "parentItem": parent_item["key"], + } + ], + session_id=session_id, + ) + return { + "action": "note_add", + "sessionID": session_id, + "parentItemKey": parent_item["key"], + "parentItemID": parent_item["itemID"], + "format": fmt, + "notePreview": zotero_sqlite.note_preview(note_html), + "selectedLibraryID": selected_library_id, + } diff --git a/zotero/agent-harness/cli_anything/zotero/core/rendering.py b/zotero/agent-harness/cli_anything/zotero/core/rendering.py new file mode 100644 index 000000000..1f9a56fa2 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/core/rendering.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from typing import Any + +from cli_anything.zotero.core.catalog import get_item, local_api_scope +from cli_anything.zotero.core.discovery import RuntimeContext +from cli_anything.zotero.utils import zotero_http + + +SUPPORTED_EXPORT_FORMATS = ("ris", "bibtex", "biblatex", "csljson", "csv", "mods", "refer") + + +def _require_local_api(runtime: RuntimeContext) -> None: + if not runtime.local_api_available: + raise RuntimeError( + "Zotero Local API is not available. Start Zotero and enable " + "`extensions.zotero.httpServer.localAPI.enabled` first." + ) + + +def _resolve_item(runtime: RuntimeContext, ref: str | int | None, session: dict[str, Any] | None = None) -> dict[str, Any]: + item = get_item(runtime, ref, session=session) + return item + + +def export_item(runtime: RuntimeContext, ref: str | int | None, fmt: str, session: dict[str, Any] | None = None) -> dict[str, Any]: + _require_local_api(runtime) + if fmt not in SUPPORTED_EXPORT_FORMATS: + raise RuntimeError(f"Unsupported export format: {fmt}") + item = _resolve_item(runtime, ref, session=session) + key = str(item["key"]) + scope = local_api_scope(runtime, int(item["libraryID"])) + body = zotero_http.local_api_get_text(runtime.environment.port, f"{scope}/items/{key}", params={"format": fmt}) + return {"itemKey": key, "libraryID": int(item["libraryID"]), "format": fmt, "content": body} + + +def citation_item( + runtime: RuntimeContext, + ref: str | int | None, + *, + style: str | None = None, + locale: str | None = None, + linkwrap: bool = False, + session: dict[str, Any] | None = None, +) -> dict[str, Any]: + _require_local_api(runtime) + item = _resolve_item(runtime, ref, session=session) + key = str(item["key"]) + params: dict[str, Any] = {"format": "json", "include": "citation"} + if style: + params["style"] = style + if locale: + params["locale"] = locale + if linkwrap: + params["linkwrap"] = "1" + scope = local_api_scope(runtime, int(item["libraryID"])) + payload = zotero_http.local_api_get_json(runtime.environment.port, f"{scope}/items/{key}", params=params) + citation = payload.get("citation") if isinstance(payload, dict) else (payload[0].get("citation") if payload else None) + return { + "itemKey": key, + "libraryID": int(item["libraryID"]), + "style": style, + "locale": locale, + "linkwrap": linkwrap, + "citation": citation, + } + + +def bibliography_item( + runtime: RuntimeContext, + ref: str | int | None, + *, + style: str | None = None, + locale: str | None = None, + linkwrap: bool = False, + session: dict[str, Any] | None = None, +) -> dict[str, Any]: + _require_local_api(runtime) + item = _resolve_item(runtime, ref, session=session) + key = str(item["key"]) + params: dict[str, Any] = {"format": "json", "include": "bib"} + if style: + params["style"] = style + if locale: + params["locale"] = locale + if linkwrap: + params["linkwrap"] = "1" + scope = local_api_scope(runtime, int(item["libraryID"])) + payload = zotero_http.local_api_get_json(runtime.environment.port, f"{scope}/items/{key}", params=params) + bibliography = payload.get("bib") if isinstance(payload, dict) else (payload[0].get("bib") if payload else None) + return { + "itemKey": key, + "libraryID": int(item["libraryID"]), + "style": style, + "locale": locale, + "linkwrap": linkwrap, + "bibliography": bibliography, + } diff --git a/zotero/agent-harness/cli_anything/zotero/core/session.py b/zotero/agent-harness/cli_anything/zotero/core/session.py new file mode 100644 index 000000000..add34488d --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/core/session.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any + + +COMMAND_HISTORY_LIMIT = 50 +STATE_DIR_ENV = "CLI_ANYTHING_ZOTERO_STATE_DIR" +APP_NAME = "cli-anything-zotero" + + +def session_state_dir() -> Path: + override = os.environ.get(STATE_DIR_ENV, "").strip() + if override: + return Path(override).expanduser() + return Path.home() / ".config" / APP_NAME + + +def session_state_path() -> Path: + return session_state_dir() / "session.json" + + +def default_session_state() -> dict[str, Any]: + return {"current_library": None, "current_collection": None, "current_item": None, "command_history": []} + + +def load_session_state() -> dict[str, Any]: + path = session_state_path() + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (FileNotFoundError, json.JSONDecodeError): + return default_session_state() + history = [item for item in data.get("command_history", []) if isinstance(item, str)] + return { + "current_library": data.get("current_library"), + "current_collection": data.get("current_collection"), + "current_item": data.get("current_item"), + "command_history": history[-COMMAND_HISTORY_LIMIT:], + } + + +def locked_save_json(path: Path, data: dict[str, Any]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + try: + handle = open(path, "r+", encoding="utf-8") + except FileNotFoundError: + handle = open(path, "w", encoding="utf-8") + with handle: + locked = False + try: + import fcntl + + fcntl.flock(handle.fileno(), fcntl.LOCK_EX) + locked = True + except (ImportError, OSError): + pass + try: + handle.seek(0) + handle.truncate() + json.dump(data, handle, ensure_ascii=False, indent=2) + handle.flush() + finally: + if locked: + fcntl.flock(handle.fileno(), fcntl.LOCK_UN) + + +def save_session_state(session: dict[str, Any]) -> None: + locked_save_json( + session_state_path(), + { + "current_library": session.get("current_library"), + "current_collection": session.get("current_collection"), + "current_item": session.get("current_item"), + "command_history": list(session.get("command_history", []))[-COMMAND_HISTORY_LIMIT:], + }, + ) + + +def append_command_history(command_line: str) -> None: + command_line = command_line.strip() + if not command_line: + return + session = load_session_state() + history = list(session.get("command_history", [])) + history.append(command_line) + session["command_history"] = history[-COMMAND_HISTORY_LIMIT:] + save_session_state(session) + + +def build_session_payload(session: dict[str, Any]) -> dict[str, Any]: + history = list(session.get("command_history", [])) + return { + "current_library": session.get("current_library"), + "current_collection": session.get("current_collection"), + "current_item": session.get("current_item"), + "state_path": str(session_state_path()), + "history_count": len(history), + } + + +def expand_repl_aliases_with_state(argv: list[str], session: dict[str, Any]) -> list[str]: + aliases = {"@library": session.get("current_library"), "@collection": session.get("current_collection"), "@item": session.get("current_item")} + expanded: list[str] = [] + for token in argv: + if token in aliases and aliases[token]: + expanded.append(str(aliases[token])) + else: + expanded.append(token) + return expanded diff --git a/zotero/agent-harness/cli_anything/zotero/skills/SKILL.md b/zotero/agent-harness/cli_anything/zotero/skills/SKILL.md new file mode 100644 index 000000000..a62cf1af2 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/skills/SKILL.md @@ -0,0 +1,243 @@ +--- +name: >- + cli-anything-zotero +description: >- + CLI harness for Zotero. +--- + +# cli-anything-zotero + +`cli-anything-zotero` is an agent-native CLI for Zotero desktop. It does not reimplement Zotero. Instead, it composes Zotero's real local surfaces: + +## Installation + +```bash +pip install -e . +``` + +## Entry Points + +```bash +cli-anything-zotero +python -m cli_anything.zotero +``` + +## Command Groups + + +### App + +Application and runtime inspection commands. + +| Command | Description | +|---------|-------------| + +| `status` | Execute `status`. | + +| `version` | Execute `version`. | + +| `launch` | Execute `launch`. | + +| `enable-local-api` | Execute `enable-local-api`. | + +| `ping` | Execute `ping`. | + + + +### Collection + +Collection inspection and selection commands. + +| Command | Description | +|---------|-------------| + +| `list` | Execute `list`. | + +| `find` | Execute `find`. | + +| `tree` | Execute `tree`. | + +| `get` | Execute `get`. | + +| `items` | Execute `items`. | + +| `use-selected` | Execute `use-selected`. | + + + +### Item + +Item inspection and rendering commands. + +| Command | Description | +|---------|-------------| + +| `list` | Execute `list`. | + +| `find` | Execute `find`. | + +| `get` | Execute `get`. | + +| `children` | Execute `children`. | + +| `notes` | Execute `notes`. | + +| `attachments` | Execute `attachments`. | + +| `file` | Execute `file`. | + +| `citation` | Execute `citation`. | + +| `bibliography` | Execute `bibliography`. | + +| `context` | Execute `context`. | + +| `analyze` | Execute `analyze`. | + +| `add-to-collection` | Execute `add-to-collection`. | + +| `move-to-collection` | Execute `move-to-collection`. | + + + +### Search + +Saved-search inspection commands. + +| Command | Description | +|---------|-------------| + +| `list` | Execute `list`. | + +| `get` | Execute `get`. | + +| `items` | Execute `items`. | + + + +### Tag + +Tag inspection commands. + +| Command | Description | +|---------|-------------| + +| `list` | Execute `list`. | + +| `items` | Execute `items`. | + + + +### Style + +Installed CSL style inspection commands. + +| Command | Description | +|---------|-------------| + +| `list` | Execute `list`. | + + + +### Import + +Official Zotero import and write commands. + +| Command | Description | +|---------|-------------| + +| `file` | Execute `file`. | + +| `json` | Execute `json`. | + + + +### Note + +Read and add child notes. + +| Command | Description | +|---------|-------------| + +| `get` | Execute `get`. | + + + +### Session + +Session and REPL context commands. + +| Command | Description | +|---------|-------------| + +| `status` | Execute `status`. | + +| `use-library` | Execute `use-library`. | + +| `use-collection` | Execute `use-collection`. | + +| `use-item` | Execute `use-item`. | + +| `use-selected` | Execute `use-selected`. | + +| `clear-library` | Execute `clear-library`. | + +| `clear-collection` | Execute `clear-collection`. | + +| `clear-item` | Execute `clear-item`. | + +| `history` | Execute `history`. | + + + +## Examples + + +### Runtime Status + +Inspect Zotero paths and backend availability. + +```bash +cli-anything-zotero app status --json +``` + + +### Read Selected Collection + +Persist the collection selected in the Zotero GUI. + +```bash +cli-anything-zotero collection use-selected --json +``` + + +### Render Citation + +Render a citation using Zotero's Local API. + +```bash +cli-anything-zotero item citation <item-key> --style apa --locale en-US --json +``` + + +### Add Child Note + +Create a child note under an existing Zotero item. + +```bash +cli-anything-zotero note add <item-key> --text "Key takeaway" --json +``` + + +### Build LLM Context + +Assemble structured context for downstream model analysis. + +```bash +cli-anything-zotero item context <item-key> --include-notes --include-links --json +``` + + +## Version + +0.1.0 \ No newline at end of file diff --git a/zotero/agent-harness/cli_anything/zotero/tests/TEST.md b/zotero/agent-harness/cli_anything/zotero/tests/TEST.md new file mode 100644 index 000000000..174d8532f --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/tests/TEST.md @@ -0,0 +1,307 @@ +# Zotero CLI Harness - Test Documentation + +## Test Inventory + +| File | Focus | Coverage | +|---|---|---| +| `test_core.py` | Path discovery, SQLite inspection, library-aware resolution, note/context/analyze helpers, experimental SQLite writes | Unit + mocked HTTP | +| `test_cli_entrypoint.py` | CLI help, REPL entry, subprocess behavior, fake connector/Local API/OpenAI flows, group-library routing | Installed/subprocess behavior | +| `test_agent_harness.py` | Packaging, harness structure, skill generation | Packaging integrity | +| `test_full_e2e.py` | Real Zotero runtime, safe read workflows, opt-in write flows | Live validation | + +## Unit Test Plan + +### Path Discovery + +- resolve profile root from explicit path and environment +- parse `profiles.ini` +- parse `prefs.js` and `user.js` +- resolve custom `extensions.zotero.dataDir` +- fall back to `~/Zotero` +- resolve executable and version +- detect Local API pref state + +### SQLite Inspection + +- libraries +- collections and collection tree +- title search and collection search +- items, notes, attachments, and annotations +- item fields, creators, and tags +- saved searches and conditions +- tag-linked item lookup +- attachment real-path resolution +- duplicate key resolution across user and group libraries + +### Context, Notes, and Analysis + +- `item find` Local API preference and SQLite fallback +- group-library Local API scope selection +- `item notes` and `note get` +- `note add` payload construction for text and markdown +- `item context` aggregation of links, notes, and exports +- `item analyze` OpenAI request path and missing API key errors + +### Experimental SQLite Writes + +- `collection create` +- `item add-to-collection` +- `item move-to-collection` +- backup creation +- transaction commit/rollback behavior +- Zotero-running guard +- local user-library-only restriction + +### Import Core + +- `app enable-local-api` idempotency +- connector-required guard for write commands +- `import file` raw-text handoff +- `import json` parsing and validation +- inline `attachments` extraction and stripping for `import json` +- `--attachments-manifest` parsing and index/title validation for `import file` +- local-file and URL PDF validation, including magic-byte acceptance when `Content-Type` is wrong +- partial-success attachment reporting and non-zero exit semantics +- duplicate attachment skipping within the same import request +- session-target fallback chain +- repeatable tag propagation to `updateSession` + +## CLI / Subprocess Plan + +- root `--help` +- default REPL entry +- REPL help text +- `app status --json` +- `app enable-local-api --json` +- `collection list/find --json` +- `item get/find/notes/context --json` +- `note get/add --json` +- `item analyze --json` against a fake OpenAI-compatible endpoint +- group-library `item find/export/citation/bibliography/search items` routing +- `session use-library L<id>` normalization +- force-installed subprocess resolution via `CLI_ANYTHING_FORCE_INSTALLED=1` +- `import json` with inline local and URL PDF attachments +- `import file` with `--attachments-manifest` +- partial-success import attachment failures returning non-zero +- experimental collection write commands against an isolated SQLite copy + +## Live E2E Plan + +### Non-Mutating + +- `app ping` +- `collection use-selected` +- `collection tree/get/items` +- `item list/get/find/attachments/file` +- `item notes` +- `note get` +- `tag list/items` +- `search list/get/items` when saved searches exist +- `session use-collection/use-item` +- `style list` +- `item context` +- `item citation` +- `item bibliography` +- `item export --format ris|bibtex|csljson` + +### Mutating + +- `import file` +- `import json` +- `import json` with inline local PDF attachment +- `note add` + +These write tests are opt-in only and require: + +- `CLI_ANYTHING_ZOTERO_ENABLE_WRITE_E2E=1` +- `CLI_ANYTHING_ZOTERO_IMPORT_TARGET=<collection-key-or-id>` + +Experimental SQLite write commands are intentionally **not** executed against the +real Zotero library. They are tested only against isolated SQLite copies. + +## Test Results + +Validation completed on 2026-03-27. + +### Machine / Runtime + +- OS: Windows +- Python: 3.13.5 +- Zotero executable: `C:\Program Files\Zotero\zotero.exe` +- Zotero version: `7.0.32` +- Active profile: `C:\Users\Lenovo\AppData\Roaming\Zotero\Zotero\Profiles\38ay0ldk.default` +- Active data dir: `D:\Study\็ง‘็ ”\่ฎบๆ–‡` +- HTTP port: `23119` +- Local API state during validation: enabled and available + +### Product Validation Commands + +```powershell +py -m pip install -e . +py -m pytest cli_anything/zotero/tests/test_core.py -v +py -m pytest cli_anything/zotero/tests/test_cli_entrypoint.py -v +py -m pytest cli_anything/zotero/tests/test_agent_harness.py -v +py -m pytest cli_anything/zotero/tests/test_full_e2e.py -v -s +py -m pytest cli_anything/zotero/tests/ -v --tb=no + +$env:CLI_ANYTHING_FORCE_INSTALLED=1 +py -m pytest cli_anything/zotero/tests/test_cli_entrypoint.py -v +py -m pytest cli_anything/zotero/tests/test_full_e2e.py -v -s + +cli-anything-zotero --json app status +cli-anything-zotero --json collection find "ๅ…ท่บซ" +cli-anything-zotero --json item find "embodied intelligence" --limit 5 +cli-anything-zotero --json item context PB98EI9N --include-links +cli-anything-zotero --json note get <note-key> +``` + +### Real Zotero Results + +- `app status --json` reported: + - `connector_available: true` + - `local_api_available: true` + - `local_api_enabled_configured: true` +- `collection use-selected --json` returned the live GUI selection from the running Zotero window +- `item find` succeeded on a live library item through the Local API search path +- `item context` produced structured item metadata and prompt-ready text on a real library item +- `item notes` and `note get` succeeded when a real item with child notes was available +- `item citation`, `item bibliography`, and `item export` all succeeded on a real regular item +- export validation succeeded: + - RIS contained `TY -` + - BibTeX contained `@` + - CSL JSON parsed successfully + +### Write-Test Policy Result + +- mocked connector write-path tests for `import file`, `import json`, import-time PDF attachments, and `note add` passed +- subprocess tests for the same write paths, including inline and manifest attachment flows, passed against fake local services +- mocked group-library Local API routing passed for `item find`, `item export`, `item citation`, `item bibliography`, and `search items` +- installed-command subprocess checks passed with `CLI_ANYTHING_FORCE_INSTALLED=1` +- real write-import, live import-with-attachment, and live note-add E2E remain opt-in by default +- experimental SQLite write commands were validated only on isolated local SQLite copies + +### Pytest Results + +```text +py -m pytest cli_anything/zotero/tests/ -v --tb=no + +============================= test session starts ============================= +platform win32 -- Python 3.13.5, pytest-8.4.2, pluggy-1.6.0 -- C:\Users\Lenovo\AppData\Local\Programs\Python\Python313\python.exe +cachedir: .pytest_cache +rootdir: C:\Users\Lenovo\Desktop\CLI-Anything\zotero\agent-harness +configfile: pyproject.toml +plugins: anyio-4.9.0 +collecting ... collected 82 items + +cli_anything/zotero/tests/test_agent_harness.py::AgentHarnessPackagingTests::test_required_files_exist PASSED [ 1%] +cli_anything/zotero/tests/test_agent_harness.py::AgentHarnessPackagingTests::test_setup_reports_expected_name PASSED [ 2%] +cli_anything/zotero/tests/test_agent_harness.py::AgentHarnessPackagingTests::test_setup_reports_expected_version PASSED [ 3%] +cli_anything/zotero/tests/test_agent_harness.py::AgentHarnessPackagingTests::test_skill_generator_regenerates_skill PASSED [ 4%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_app_enable_local_api_json PASSED [ 6%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_app_status_json PASSED [ 7%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_collection_find_json PASSED [ 8%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_collection_list_json PASSED [ 9%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_default_entrypoint_starts_repl PASSED [ 10%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_dispatch_uses_requested_prog_name PASSED [ 12%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_experimental_collection_write_commands PASSED [ 13%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_force_installed_mode_requires_real_command PASSED [ 14%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_group_library_routes_use_group_scope PASSED [ 15%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_help_renders_groups PASSED [ 17%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_import_file_subprocess PASSED [ 18%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_import_file_subprocess_with_attachment_manifest PASSED [ 19%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_import_json_subprocess PASSED [ 20%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_import_json_subprocess_duplicate_attachment_is_idempotent PASSED [ 21%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_import_json_subprocess_partial_success_returns_nonzero PASSED [ 23%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_import_json_subprocess_with_inline_file_attachment PASSED [ 24%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_import_json_subprocess_with_url_attachment PASSED [ 25%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_item_context_and_analyze PASSED [ 26%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_item_find_and_notes_json PASSED [ 28%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_item_get_json PASSED [ 29%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_note_get_and_add PASSED [ 30%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_repl_help_text_mentions_builtins PASSED [ 31%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_session_status_json PASSED [ 32%] +cli_anything/zotero/tests/test_cli_entrypoint.py::CliEntrypointTests::test_session_use_library_normalizes_tree_view_library_ref PASSED [ 34%] +cli_anything/zotero/tests/test_core.py::PathDiscoveryTests::test_build_environment_accepts_env_profile_dir_pointing_to_profile PASSED [ 35%] +cli_anything/zotero/tests/test_core.py::PathDiscoveryTests::test_build_environment_falls_back_to_home_zotero PASSED [ 36%] +cli_anything/zotero/tests/test_core.py::PathDiscoveryTests::test_build_environment_uses_active_profile_and_data_dir_pref PASSED [ 37%] +cli_anything/zotero/tests/test_core.py::PathDiscoveryTests::test_ensure_local_api_enabled_writes_user_js PASSED [ 39%] +cli_anything/zotero/tests/test_core.py::SQLiteInspectionTests::test_cross_library_unique_key_still_resolves_without_session_context PASSED [ 40%] +cli_anything/zotero/tests/test_core.py::SQLiteInspectionTests::test_duplicate_key_resolution_requires_library_context PASSED [ 41%] +cli_anything/zotero/tests/test_core.py::SQLiteInspectionTests::test_experimental_sqlite_write_helpers PASSED [ 42%] +cli_anything/zotero/tests/test_core.py::SQLiteInspectionTests::test_fetch_collections_and_tree PASSED [ 43%] +cli_anything/zotero/tests/test_core.py::SQLiteInspectionTests::test_fetch_item_children_and_attachments PASSED [ 45%] +cli_anything/zotero/tests/test_core.py::SQLiteInspectionTests::test_fetch_libraries PASSED [ 46%] +cli_anything/zotero/tests/test_core.py::SQLiteInspectionTests::test_fetch_saved_searches_and_tags PASSED [ 47%] +cli_anything/zotero/tests/test_core.py::SQLiteInspectionTests::test_find_collections_and_items_and_notes PASSED [ 48%] +cli_anything/zotero/tests/test_core.py::SQLiteInspectionTests::test_resolve_item_includes_fields_creators_tags PASSED [ 50%] +cli_anything/zotero/tests/test_core.py::SessionTests::test_expand_repl_aliases PASSED [ 51%] +cli_anything/zotero/tests/test_core.py::SessionTests::test_normalize_library_ref_accepts_plain_and_tree_view_ids PASSED [ 52%] +cli_anything/zotero/tests/test_core.py::SessionTests::test_save_and_load_session_state PASSED [ 53%] +cli_anything/zotero/tests/test_core.py::HttpUtilityTests::test_build_runtime_context_reports_unavailable_services PASSED [ 54%] +cli_anything/zotero/tests/test_core.py::HttpUtilityTests::test_catalog_style_list_parses_csl PASSED [ 56%] +cli_anything/zotero/tests/test_core.py::HttpUtilityTests::test_wait_for_endpoint_requires_explicit_ready_status PASSED [ 57%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_enable_local_api_reports_idempotent_state PASSED [ 58%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_import_file_manifest_index_out_of_range_and_missing_connector_id_fail_cleanly PASSED [ 59%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_import_file_manifest_partial_success_records_attachment_failures PASSED [ 60%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_import_file_manifest_title_mismatch_marks_attachment_failure PASSED [ 62%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_import_file_posts_raw_text_and_explicit_tree_view_target PASSED [ 63%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_import_json_duplicate_inline_attachments_are_skipped PASSED [ 64%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_import_json_rejects_invalid_inline_attachment_schema PASSED [ 65%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_import_json_rejects_invalid_json PASSED [ 67%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_import_json_strips_inline_attachments_and_uploads_local_pdf PASSED [ 68%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_import_json_url_attachment_uses_delay_and_default_timeout PASSED [ 69%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_import_json_uses_session_collection_and_tags PASSED [ 70%] +cli_anything/zotero/tests/test_core.py::ImportCoreTests::test_import_requires_connector PASSED [ 71%] +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_collection_find_and_item_find_sqlite_fallback PASSED [ 73%] +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_collection_scoped_item_find_prefers_local_api PASSED [ 74%] +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_experimental_commands_require_closed_zotero_and_update_db_copy PASSED [ 75%] +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_group_library_local_api_scope_and_search_routes PASSED [ 76%] +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_item_analyze_requires_api_key_and_uses_openai PASSED [ 78%] +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_item_context_aggregates_exports_and_links PASSED [ 79%] +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_item_notes_and_note_get PASSED [ 80%] +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_note_add_builds_child_note_payload PASSED [ 81%] +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_rendering_uses_group_library_local_api_scope PASSED [ 82%] +cli_anything/zotero/tests/test_core.py::OpenAIUtilityTests::test_extract_text_from_response_payload PASSED [ 84%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_attachment_inventory_commands PASSED [ 85%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_collection_detail_commands PASSED [ 86%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_collection_use_selected PASSED [ 87%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_connector_ping PASSED [ 89%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_item_citation_bibliography_and_exports PASSED [ 90%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_item_find_and_context_commands PASSED [ 91%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_note_inventory_commands PASSED [ 92%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_opt_in_import_json_with_inline_attachment SKIPPED [ 93%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_opt_in_note_add_command SKIPPED [ 95%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_opt_in_write_import_commands SKIPPED [ 96%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_search_detail_commands SKIPPED [ 97%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_sqlite_inventory_commands PASSED [ 98%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_tag_and_session_commands PASSED [100%] + +================== 78 passed, 4 skipped in 108.48s (0:01:48) ================== +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_item_notes_and_note_get PASSED [ 78%] +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_note_add_builds_child_note_payload PASSED [ 79%] +cli_anything/zotero/tests/test_core.py::WorkflowCoreTests::test_rendering_uses_group_library_local_api_scope PASSED [ 81%] +cli_anything/zotero/tests/test_core.py::OpenAIUtilityTests::test_extract_text_from_response_payload PASSED [ 82%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_attachment_inventory_commands PASSED [ 84%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_collection_detail_commands PASSED [ 85%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_collection_use_selected PASSED [ 86%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_connector_ping PASSED [ 88%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_item_citation_bibliography_and_exports PASSED [ 89%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_item_find_and_context_commands PASSED [ 91%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_note_inventory_commands PASSED [ 92%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_opt_in_note_add_command SKIPPED [ 94%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_opt_in_write_import_commands SKIPPED [ 95%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_search_detail_commands SKIPPED [ 97%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_sqlite_inventory_commands PASSED [ 98%] +cli_anything/zotero/tests/test_full_e2e.py::ZoteroFullE2E::test_tag_and_session_commands PASSED [100%] + +================== 66 passed, 3 skipped in 87.81s (0:01:27) =================== +``` + +### Notes + +- SQLite inspection uses a read-only immutable connection so local reads continue to work while Zotero is open. +- bare key lookup is library-aware: unique keys resolve automatically, while duplicate keys require `session use-library <id>`. +- stable Local API read/export routes are validated for both `/api/users/0/...` and `/api/groups/<libraryID>/...`. +- experimental collection write commands require Zotero to be closed, require `--experimental`, and create a timestamped backup before each write. +- `item context` is the recommended model-independent AI interface. +- `item analyze` is covered by mocked OpenAI-compatible subprocess tests, not by live external API calls. diff --git a/zotero/agent-harness/cli_anything/zotero/tests/__init__.py b/zotero/agent-harness/cli_anything/zotero/tests/__init__.py new file mode 100644 index 000000000..c00067354 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for cli-anything-zotero.""" diff --git a/zotero/agent-harness/cli_anything/zotero/tests/_helpers.py b/zotero/agent-harness/cli_anything/zotero/tests/_helpers.py new file mode 100644 index 000000000..6161b722e --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/tests/_helpers.py @@ -0,0 +1,705 @@ +from __future__ import annotations + +import json +import re +import sqlite3 +import threading +from contextlib import closing, contextmanager +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path +from urllib.parse import parse_qs, unquote, urlparse + + +def sample_pdf_bytes(label: str = "sample") -> bytes: + body = f"""%PDF-1.4 +1 0 obj +<< /Type /Catalog /Pages 2 0 R >> +endobj +2 0 obj +<< /Type /Pages /Count 1 /Kids [3 0 R] >> +endobj +3 0 obj +<< /Type /Page /Parent 2 0 R /MediaBox [0 0 200 200] /Contents 4 0 R >> +endobj +4 0 obj +<< /Length 44 >> +stream +BT /F1 12 Tf 32 120 Td ({label}) Tj ET +endstream +endobj +trailer +<< /Root 1 0 R >> +%%EOF +""" + return body.encode("utf-8") + + +def create_sample_environment(base: Path) -> dict[str, Path]: + profile_root = base / "AppData" / "Roaming" / "Zotero" / "Zotero" + profile_dir = profile_root / "Profiles" / "test.default" + data_dir = base / "ZoteroData" + install_dir = base / "Program Files" / "Zotero" + storage_dir = data_dir / "storage" / "ATTACHKEY" + styles_dir = data_dir / "styles" + translators_dir = data_dir / "translators" + + profile_dir.mkdir(parents=True, exist_ok=True) + storage_dir.mkdir(parents=True, exist_ok=True) + styles_dir.mkdir(parents=True, exist_ok=True) + translators_dir.mkdir(parents=True, exist_ok=True) + install_dir.mkdir(parents=True, exist_ok=True) + + profiles_ini = """[Profile0] +Name=default +IsRelative=1 +Path=Profiles/test.default +Default=1 + +[General] +StartWithLastProfile=1 +Version=2 +""" + (profile_root / "profiles.ini").write_text(profiles_ini, encoding="utf-8") + + data_dir_pref = str(data_dir).replace("\\", "\\\\") + prefs_js = ( + 'user_pref("extensions.zotero.useDataDir", true);\n' + f'user_pref("extensions.zotero.dataDir", "{data_dir_pref}");\n' + 'user_pref("extensions.zotero.httpServer.port", 23119);\n' + 'user_pref("extensions.zotero.httpServer.localAPI.enabled", false);\n' + ) + (profile_dir / "prefs.js").write_text(prefs_js, encoding="utf-8") + + application_ini = """[App] +Vendor=Zotero +Name=Zotero +Version=7.0.32 +BuildID=20260114201345 +""" + (install_dir / "app").mkdir(exist_ok=True) + (install_dir / "app" / "application.ini").write_text(application_ini, encoding="utf-8") + (install_dir / "zotero.exe").write_text("", encoding="utf-8") + + sqlite_path = data_dir / "zotero.sqlite" + conn = sqlite3.connect(sqlite_path) + try: + cur = conn.cursor() + cur.executescript( + """ + CREATE TABLE libraries (libraryID INTEGER PRIMARY KEY, type TEXT, editable INTEGER, filesEditable INTEGER, version INTEGER, storageVersion INTEGER, lastSync INTEGER, archived INTEGER); + CREATE TABLE itemTypes (itemTypeID INTEGER PRIMARY KEY, typeName TEXT, templateItemTypeID INTEGER, display INTEGER); + CREATE TABLE items (itemID INTEGER PRIMARY KEY, itemTypeID INTEGER, dateAdded TEXT, dateModified TEXT, clientDateModified TEXT, libraryID INTEGER, key TEXT, version INTEGER, synced INTEGER); + CREATE TABLE fields (fieldID INTEGER PRIMARY KEY, fieldName TEXT, fieldFormatID INTEGER); + CREATE TABLE itemDataValues (valueID INTEGER PRIMARY KEY, value TEXT); + CREATE TABLE itemData (itemID INTEGER, fieldID INTEGER, valueID INTEGER); + CREATE TABLE creators (creatorID INTEGER PRIMARY KEY, firstName TEXT, lastName TEXT, fieldMode INTEGER); + CREATE TABLE itemCreators (itemID INTEGER, creatorID INTEGER, creatorTypeID INTEGER, orderIndex INTEGER); + CREATE TABLE tags (tagID INTEGER PRIMARY KEY, name TEXT); + CREATE TABLE itemTags (itemID INTEGER, tagID INTEGER, type INTEGER); + CREATE TABLE collections (collectionID INTEGER PRIMARY KEY, collectionName TEXT, parentCollectionID INTEGER, clientDateModified TEXT, libraryID INTEGER, key TEXT, version INTEGER, synced INTEGER); + CREATE TABLE collectionItems (collectionID INTEGER, itemID INTEGER, orderIndex INTEGER); + CREATE TABLE itemNotes (itemID INTEGER PRIMARY KEY, parentItemID INTEGER, note TEXT, title TEXT); + CREATE TABLE itemAttachments (itemID INTEGER PRIMARY KEY, parentItemID INTEGER, linkMode INTEGER, contentType TEXT, charsetID INTEGER, path TEXT, syncState INTEGER, storageModTime INTEGER, storageHash TEXT, lastProcessedModificationTime INTEGER); + CREATE TABLE itemAnnotations (itemID INTEGER PRIMARY KEY, parentItemID INTEGER, type INTEGER, authorName TEXT, text TEXT, comment TEXT, color TEXT, pageLabel TEXT, sortIndex TEXT, position TEXT, isExternal INTEGER); + CREATE TABLE savedSearches (savedSearchID INTEGER PRIMARY KEY, savedSearchName TEXT, clientDateModified TEXT, libraryID INTEGER, key TEXT, version INTEGER, synced INTEGER); + CREATE TABLE savedSearchConditions (savedSearchID INTEGER, searchConditionID INTEGER, condition TEXT, operator TEXT, value TEXT, required INTEGER); + CREATE UNIQUE INDEX items_library_key ON items(libraryID, key); + CREATE UNIQUE INDEX collections_library_key ON collections(libraryID, key); + CREATE UNIQUE INDEX saved_searches_library_key ON savedSearches(libraryID, key); + """ + ) + cur.executemany( + "INSERT INTO libraries VALUES (?, ?, 1, 1, 1, 1, 0, 0)", + [(1, "user"), (2, "group")], + ) + cur.executemany( + "INSERT INTO itemTypes VALUES (?, ?, NULL, 1)", + [(1, "journalArticle"), (2, "attachment"), (3, "note")], + ) + cur.executemany( + "INSERT INTO items VALUES (?, ?, '2026-01-01', '2026-01-02', '2026-01-02', ?, ?, 1, 1)", + [ + (1, 1, 1, "REG12345"), + (2, 2, 1, "ATTACHKEY"), + (3, 3, 1, "NOTEKEY"), + (4, 1, 1, "REG67890"), + (5, 1, 2, "GROUPKEY"), + (6, 1, 1, "DUPITEM1"), + (7, 1, 2, "DUPITEM1"), + (8, 2, 1, "LINKATT1"), + ], + ) + cur.executemany("INSERT INTO fields VALUES (?, ?, 0)", [(1, "title"), (2, "DOI"), (3, "url")]) + cur.executemany( + "INSERT INTO itemDataValues VALUES (?, ?)", + [ + (1, "Sample Title"), + (2, "Second Item"), + (3, "10.1000/sample"), + (4, "https://example.com/paper"), + (5, "Group Title"), + (6, "User Duplicate Title"), + (7, "Group Duplicate Title"), + ], + ) + cur.executemany( + "INSERT INTO itemData VALUES (?, ?, ?)", + [(1, 1, 1), (4, 1, 2), (1, 2, 3), (1, 3, 4), (5, 1, 5), (6, 1, 6), (7, 1, 7)], + ) + cur.executemany( + "INSERT INTO creators VALUES (?, ?, ?, 0)", + [(1, "Ada", "Lovelace"), (2, "Grace", "Hopper")], + ) + cur.executemany("INSERT INTO itemCreators VALUES (?, ?, 1, 0)", [(1, 1), (5, 2)]) + cur.executemany("INSERT INTO tags VALUES (?, ?)", [(1, "sample-tag"), (2, "group-tag")]) + cur.executemany("INSERT INTO itemTags VALUES (?, ?, 0)", [(1, 1), (4, 1), (5, 2)]) + cur.executemany( + "INSERT INTO collections VALUES (?, ?, ?, '2026-01-02', ?, ?, 1, 1)", + [ + (1, "Sample Collection", None, 1, "COLLAAAA"), + (2, "Archive Collection", None, 1, "COLLBBBB"), + (3, "Nested Collection", 1, 1, "COLLCCCC"), + (4, "User Duplicate Collection", None, 1, "DUPCOLL1"), + (10, "Group Collection", None, 2, "GCOLLAAA"), + (11, "Group Duplicate Collection", None, 2, "DUPCOLL1"), + ], + ) + cur.executemany( + "INSERT INTO collectionItems VALUES (?, ?, ?)", + [(1, 1, 0), (1, 4, 1), (2, 4, 0), (4, 6, 0), (10, 5, 0), (11, 7, 0)], + ) + cur.execute("INSERT INTO itemNotes VALUES (3, 1, '<div>Example note</div>', 'Example note')") + cur.execute( + "INSERT INTO itemAttachments VALUES (2, 1, 0, 'application/pdf', NULL, 'storage:paper.pdf', 0, 0, '', 0)" + ) + cur.execute( + "INSERT INTO itemAttachments VALUES (8, 4, 2, 'application/pdf', NULL, 'file:///C:/Users/Public/linked.pdf', 0, 0, '', 0)" + ) + cur.executemany( + "INSERT INTO savedSearches VALUES (?, ?, '2026-01-02', ?, ?, 1, 1)", + [ + (1, "Important", 1, "SEARCHKEY"), + (2, "User Duplicate Search", 1, "DUPSEARCH"), + (3, "Group Search", 2, "GSEARCHKEY"), + (4, "Group Duplicate Search", 2, "DUPSEARCH"), + ], + ) + cur.executemany( + "INSERT INTO savedSearchConditions VALUES (?, 1, 'title', 'contains', ?, 1)", + [(1, "Sample"), (2, "Duplicate"), (3, "Group"), (4, "Duplicate")], + ) + conn.commit() + finally: + conn.close() + + (storage_dir / "paper.pdf").write_bytes(sample_pdf_bytes("sample")) + (styles_dir / "sample-style.csl").write_text( + """<style xmlns="http://purl.org/net/xbiblio/csl" version="1.0"> + <info> + <title>Sample Style + http://www.zotero.org/styles/sample-style + + +""", + encoding="utf-8", + ) + + return { + "profile_root": profile_root, + "profile_dir": profile_dir, + "data_dir": data_dir, + "sqlite_path": sqlite_path, + "install_dir": install_dir, + "executable": install_dir / "zotero.exe", + "styles_dir": styles_dir, + } + + +def _next_id(conn: sqlite3.Connection, table: str, column: str) -> int: + row = conn.execute(f"SELECT COALESCE(MAX({column}), 0) + 1 AS next_id FROM {table}").fetchone() + assert row is not None + return int(row["next_id"]) + + +def _item_type_id(conn: sqlite3.Connection, type_name: str) -> int: + row = conn.execute("SELECT itemTypeID FROM itemTypes WHERE typeName = ?", (type_name,)).fetchone() + if row: + return int(row["itemTypeID"]) + fallback = conn.execute("SELECT itemTypeID FROM itemTypes WHERE typeName = 'journalArticle'").fetchone() + assert fallback is not None + return int(fallback["itemTypeID"]) + + +def _field_id(conn: sqlite3.Connection, field_name: str) -> int: + row = conn.execute("SELECT fieldID FROM fields WHERE fieldName = ?", (field_name,)).fetchone() + if row: + return int(row["fieldID"]) + field_id = _next_id(conn, "fields", "fieldID") + conn.execute("INSERT INTO fields VALUES (?, ?, 0)", (field_id, field_name)) + return field_id + + +def _set_item_field(conn: sqlite3.Connection, item_id: int, field_name: str, value: str) -> None: + value_id = _next_id(conn, "itemDataValues", "valueID") + conn.execute("INSERT INTO itemDataValues VALUES (?, ?)", (value_id, value)) + conn.execute("INSERT INTO itemData VALUES (?, ?, ?)", (item_id, _field_id(conn, field_name), value_id)) + + +def _item_key(prefix: str, item_id: int) -> str: + return f"{prefix}{item_id:05d}" + + +def _safe_pdf_filename(source_url: str) -> str: + parsed = urlparse(source_url) + candidate = Path(unquote(parsed.path or "")).name or "attachment.pdf" + candidate = re.sub(r"[^A-Za-z0-9._-]+", "-", candidate).strip("-") or "attachment.pdf" + if not candidate.lower().endswith(".pdf"): + candidate += ".pdf" + return candidate + + +def _split_ris_records(content: str) -> list[str]: + records: list[str] = [] + current: list[str] = [] + for line in content.splitlines(): + current.append(line) + if line.startswith("ER -"): + record = "\n".join(current).strip() + if record: + records.append(record) + current = [] + if current: + record = "\n".join(current).strip() + if record: + records.append(record) + return records or [content] + + +def _ris_title(record: str) -> str: + match = re.search(r"(?m)^TI - (.+)$", record) + return match.group(1).strip() if match else "Imported Sample" + + +@contextmanager +def fake_zotero_http_server( + *, + local_api_root_status: int = 200, + sqlite_path: Path | str | None = None, + data_dir: Path | str | None = None, +): + calls: list[dict[str, object]] = [] + sqlite_file = Path(sqlite_path) if sqlite_path is not None else None + zotero_data_dir = Path(data_dir) if data_dir is not None else None + sessions: dict[str, dict[str, object]] = {} + + def db_connect() -> sqlite3.Connection: + if sqlite_file is None: + raise RuntimeError("sqlite_path is required for this fake server operation") + conn = sqlite3.connect(sqlite_file) + conn.row_factory = sqlite3.Row + return conn + + def create_top_level_item( + item_payload: dict[str, object], + *, + connector_id: str, + library_id: int = 1, + ) -> dict[str, object]: + if sqlite_file is None: + return {"connector_id": connector_id} + with closing(db_connect()) as conn: + item_id = _next_id(conn, "items", "itemID") + key = _item_key("IMP", item_id) + item_type = str(item_payload.get("itemType") or "journalArticle") + title = str(item_payload.get("title") or item_payload.get("bookTitle") or item_payload.get("publicationTitle") or "") + item_type_id = _item_type_id(conn, item_type) + conn.execute( + "INSERT INTO items VALUES (?, ?, '2026-03-27', '2026-03-27', '2026-03-27', ?, ?, 1, 1)", + (item_id, item_type_id, library_id, key), + ) + if title: + _set_item_field(conn, item_id, "title", title) + conn.commit() + return { + "connector_id": connector_id, + "itemID": item_id, + "key": key, + "title": title, + "libraryID": library_id, + "itemType": item_type, + } + + def create_note_item(item_payload: dict[str, object], *, connector_id: str) -> dict[str, object]: + if sqlite_file is None: + return {"connector_id": connector_id} + parent_key = str(item_payload.get("parentItem") or "") + note_html = str(item_payload.get("note") or "") + with closing(db_connect()) as conn: + parent = conn.execute("SELECT itemID, libraryID FROM items WHERE key = ?", (parent_key,)).fetchone() + if parent is None: + raise RuntimeError(f"Unknown parent item for note: {parent_key}") + item_id = _next_id(conn, "items", "itemID") + key = _item_key("NOT", item_id) + conn.execute( + "INSERT INTO items VALUES (?, ?, '2026-03-27', '2026-03-27', '2026-03-27', ?, ?, 1, 1)", + (item_id, _item_type_id(conn, "note"), int(parent["libraryID"]), key), + ) + conn.execute( + "INSERT INTO itemNotes VALUES (?, ?, ?, ?)", + (item_id, int(parent["itemID"]), note_html, "Imported note"), + ) + conn.commit() + return { + "connector_id": connector_id, + "itemID": item_id, + "key": key, + "title": "Imported note", + "libraryID": int(parent["libraryID"]), + "itemType": "note", + } + + def create_attachment_item(*, parent_item_id: int, title: str, source_url: str, content: bytes) -> dict[str, object]: + if sqlite_file is None or zotero_data_dir is None: + return {"title": title, "url": source_url} + with closing(db_connect()) as conn: + parent = conn.execute("SELECT libraryID FROM items WHERE itemID = ?", (parent_item_id,)).fetchone() + if parent is None: + raise RuntimeError(f"Unknown parent item id: {parent_item_id}") + attachment_id = _next_id(conn, "items", "itemID") + attachment_key = _item_key("ATT", attachment_id) + filename = _safe_pdf_filename(source_url) + storage_dir = zotero_data_dir / "storage" / attachment_key + storage_dir.mkdir(parents=True, exist_ok=True) + (storage_dir / filename).write_bytes(content) + conn.execute( + "INSERT INTO items VALUES (?, ?, '2026-03-27', '2026-03-27', '2026-03-27', ?, ?, 1, 1)", + (attachment_id, _item_type_id(conn, "attachment"), int(parent["libraryID"]), attachment_key), + ) + _set_item_field(conn, attachment_id, "title", title) + _set_item_field(conn, attachment_id, "url", source_url) + conn.execute( + "INSERT INTO itemAttachments VALUES (?, ?, 1, 'application/pdf', NULL, ?, 0, 0, '', 0)", + (attachment_id, parent_item_id, f"storage:{filename}"), + ) + conn.commit() + return { + "itemID": attachment_id, + "key": attachment_key, + "path": str(storage_dir / filename), + } + + def apply_session_update(session_id: str, target: str, tags_text: str) -> None: + if sqlite_file is None: + return + session = sessions.get(session_id) + if not session: + return + item_ids = [entry["itemID"] for entry in session["items"].values() if entry.get("itemID")] + if not item_ids: + return + collection_id: int | None = None + if target.startswith("C") and target[1:].isdigit(): + collection_id = int(target[1:]) + tags = [tag.strip() for tag in tags_text.split(",") if tag.strip()] + with closing(db_connect()) as conn: + if collection_id is not None: + order_index = int( + conn.execute( + "SELECT COALESCE(MAX(orderIndex), -1) + 1 AS next_order FROM collectionItems WHERE collectionID = ?", + (collection_id,), + ).fetchone()["next_order"] + ) + for item_id in item_ids: + exists = conn.execute( + "SELECT 1 FROM collectionItems WHERE collectionID = ? AND itemID = ?", + (collection_id, item_id), + ).fetchone() + if exists is None: + conn.execute("INSERT INTO collectionItems VALUES (?, ?, ?)", (collection_id, item_id, order_index)) + order_index += 1 + for tag in tags: + row = conn.execute("SELECT tagID FROM tags WHERE name = ?", (tag,)).fetchone() + if row is None: + tag_id = _next_id(conn, "tags", "tagID") + conn.execute("INSERT INTO tags VALUES (?, ?)", (tag_id, tag)) + else: + tag_id = int(row["tagID"]) + for item_id in item_ids: + exists = conn.execute( + "SELECT 1 FROM itemTags WHERE itemID = ? AND tagID = ?", + (item_id, tag_id), + ).fetchone() + if exists is None: + conn.execute("INSERT INTO itemTags VALUES (?, ?, 0)", (item_id, tag_id)) + conn.commit() + + class Handler(BaseHTTPRequestHandler): + def _json_response(self, status: int, payload) -> None: + body = json.dumps(payload).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def _binary_response(self, status: int, payload: bytes, *, content_type: str) -> None: + self.send_response(status) + self.send_header("Content-Type", content_type) + self.send_header("Content-Length", str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _text_response(self, status: int, payload: str) -> None: + body = payload.encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "text/plain; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, format, *args): # noqa: A003 + return + + def _item_response(self, item_key: str, query: dict[str, list[str]]) -> None: + fmt = query.get("format", [""])[0] + include = query.get("include", [""])[0] + if fmt == "json" and include == "citation": + self._json_response(200, {"citation": f"({item_key} citation)"}) + return + if fmt == "json" and include == "bib": + self._json_response(200, {"bib": f"{item_key} bibliography"}) + return + if fmt == "ris": + self._text_response(200, f"TY - JOUR\nID - {item_key}\nER - \n") + return + if fmt == "bibtex": + self._text_response(200, f"@article{{{item_key.lower()}}}\n") + return + if fmt == "csljson": + self._text_response(200, json.dumps([{"id": item_key}], ensure_ascii=False)) + return + self._json_response(200, {"key": item_key}) + + def do_GET(self): # noqa: N802 + calls.append({"method": "GET", "path": self.path}) + parsed = urlparse(self.path) + path = parsed.path + query = parse_qs(parsed.query) + if path.startswith("/connector/ping"): + self.send_response(200) + self.send_header("Content-Length", "0") + self.end_headers() + return + if path == "/downloads/sample.pdf": + self._binary_response(200, sample_pdf_bytes("download"), content_type="application/pdf") + return + if path == "/downloads/wrong-content-type.pdf": + self._binary_response(200, sample_pdf_bytes("download"), content_type="text/plain") + return + if path == "/downloads/not-pdf": + self._binary_response(200, b"not-a-pdf", content_type="text/plain") + return + if path == "/downloads/missing.pdf": + self._text_response(404, "missing") + return + if path.startswith("/api/users/0/items/top"): + self._json_response( + 200, + [ + { + "key": "REG12345", + "data": { + "title": "Sample Title", + }, + } + ], + ) + return + if path.startswith("/api/users/0/collections/COLLAAAA/items/top"): + self._json_response( + 200, + [ + { + "key": "REG12345", + "data": { + "title": "Sample Title", + }, + } + ], + ) + return + if path.startswith("/api/groups/2/items/top"): + self._json_response(200, [{"key": "GROUPKEY", "data": {"title": "Group Title"}}]) + return + if path.startswith("/api/groups/2/collections/GCOLLAAA/items/top"): + self._json_response(200, [{"key": "GROUPKEY", "data": {"title": "Group Title"}}]) + return + if path.startswith("/api/groups/2/searches/GSEARCHKEY/items"): + self._json_response(200, [{"key": "GROUPKEY"}]) + return + if path.startswith("/api/users/0/searches/SEARCHKEY/items"): + self._json_response(200, [{"key": "REG12345"}]) + return + if path.startswith("/api/users/0/items/REG12345"): + self._item_response("REG12345", query) + return + if path.startswith("/api/groups/2/items/GROUPKEY"): + self._item_response("GROUPKEY", query) + return + if path.startswith("/api/"): + self.send_response(local_api_root_status) + self.send_header("Content-Length", "0") + self.end_headers() + return + self.send_response(404) + self.end_headers() + + def do_POST(self): # noqa: N802 + length = int(self.headers.get("Content-Length", "0")) + body = self.rfile.read(length) + decoded_body = body.decode("utf-8", errors="replace") + metadata_header = self.headers.get("X-Metadata") + call = { + "method": "POST", + "path": self.path, + "body": decoded_body, + } + if metadata_header: + try: + call["metadata"] = json.loads(metadata_header) + except json.JSONDecodeError: + call["metadata"] = metadata_header + if self.path.startswith("/connector/saveAttachment"): + call["body_length"] = len(body) + call["content_type"] = self.headers.get("Content-Type") + calls.append(call) + + if self.path.startswith("/connector/getSelectedCollection"): + self._json_response( + 200, + { + "libraryID": 1, + "libraryName": "My Library", + "libraryEditable": True, + "filesEditable": True, + "editable": True, + "id": 1, + "name": "Sample Collection", + "targets": [{"id": "L1", "name": "My Library", "filesEditable": True, "level": 0}], + }, + ) + return + + if self.path.startswith("/connector/import"): + parsed = urlparse(self.path) + session_id = parse_qs(parsed.query).get("session", [""])[0] + sessions.setdefault(session_id, {"items": {}}) + imported_items: list[dict[str, object]] = [] + for index, record in enumerate(_split_ris_records(decoded_body), start=1): + connector_id = f"imported-{index}" + title = _ris_title(record) + item_info = create_top_level_item( + { + "itemType": "journalArticle", + "title": title, + }, + connector_id=connector_id, + ) + sessions[session_id]["items"][connector_id] = item_info + imported_items.append( + { + "id": connector_id, + "itemType": "journalArticle", + "title": title, + } + ) + self._json_response(201, imported_items) + return + + if self.path.startswith("/connector/saveItems"): + payload = json.loads(decoded_body or "{}") + session_id = str(payload.get("sessionID") or "") + sessions.setdefault(session_id, {"items": {}}) + for item in payload.get("items", []): + connector_id = str(item.get("id") or f"connector-{len(sessions[session_id]['items']) + 1}") + if str(item.get("itemType") or "") == "note" and item.get("parentItem"): + item_info = create_note_item(item, connector_id=connector_id) + else: + item_info = create_top_level_item(item, connector_id=connector_id) + sessions[session_id]["items"][connector_id] = item_info + self.send_response(201) + self.send_header("Content-Length", "0") + self.end_headers() + return + + if self.path.startswith("/connector/updateSession"): + payload = json.loads(decoded_body or "{}") + apply_session_update( + str(payload.get("sessionID") or ""), + str(payload.get("target") or ""), + str(payload.get("tags") or ""), + ) + self._json_response(200, {}) + return + + if self.path.startswith("/connector/saveAttachment"): + try: + metadata = json.loads(metadata_header or "{}") + except json.JSONDecodeError: + self._json_response(400, {"error": "invalid metadata"}) + return + session_id = str(metadata.get("sessionID") or "") + parent_connector_id = str(metadata.get("parentItemID") or "") + session = sessions.get(session_id) + if session is None: + self._json_response(400, {"error": "unknown session"}) + return + parent = session["items"].get(parent_connector_id) + if parent is None: + self._json_response(400, {"error": "unknown parent connector id"}) + return + try: + attachment = create_attachment_item( + parent_item_id=int(parent["itemID"]), + title=str(metadata.get("title") or "PDF"), + source_url=str(metadata.get("url") or ""), + content=body, + ) + except RuntimeError as exc: + self._json_response(400, {"error": str(exc)}) + return + self._json_response(201, attachment) + return + + if self.path.startswith("/v1/responses"): + self._json_response( + 200, + { + "id": "resp_fake", + "output": [ + { + "type": "message", + "content": [ + { + "type": "output_text", + "text": "Analysis text", + } + ], + } + ], + }, + ) + return + + self.send_response(404) + self.end_headers() + + server = ThreadingHTTPServer(("127.0.0.1", 0), Handler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + yield {"port": server.server_address[1], "calls": calls, "sessions": sessions} + finally: + server.shutdown() + server.server_close() + thread.join(timeout=5) diff --git a/zotero/agent-harness/cli_anything/zotero/tests/test_agent_harness.py b/zotero/agent-harness/cli_anything/zotero/tests/test_agent_harness.py new file mode 100644 index 000000000..f29f57508 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/tests/test_agent_harness.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import subprocess +import sys +import unittest +from pathlib import Path + + +HARNESS_ROOT = Path(__file__).resolve().parents[3] + + +class AgentHarnessPackagingTests(unittest.TestCase): + def test_required_files_exist(self): + required = [ + HARNESS_ROOT / "setup.py", + HARNESS_ROOT / "pyproject.toml", + HARNESS_ROOT / "ZOTERO.md", + HARNESS_ROOT / "skill_generator.py", + HARNESS_ROOT / "templates" / "SKILL.md.template", + HARNESS_ROOT / "cli_anything" / "zotero" / "README.md", + HARNESS_ROOT / "cli_anything" / "zotero" / "zotero_cli.py", + HARNESS_ROOT / "cli_anything" / "zotero" / "utils" / "repl_skin.py", + HARNESS_ROOT / "cli_anything" / "zotero" / "skills" / "SKILL.md", + HARNESS_ROOT / "cli_anything" / "zotero" / "tests" / "TEST.md", + ] + for path in required: + self.assertTrue(path.is_file(), msg=f"missing required file: {path}") + + def test_setup_reports_expected_name(self): + result = subprocess.run([sys.executable, str(HARNESS_ROOT / "setup.py"), "--name"], cwd=HARNESS_ROOT, capture_output=True, text=True) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertEqual(result.stdout.strip(), "cli-anything-zotero") + + def test_setup_reports_expected_version(self): + result = subprocess.run([sys.executable, str(HARNESS_ROOT / "setup.py"), "--version"], cwd=HARNESS_ROOT, capture_output=True, text=True) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertEqual(result.stdout.strip(), "0.1.0") + + def test_skill_generator_regenerates_skill(self): + output_path = HARNESS_ROOT / "tmp-SKILL.md" + try: + result = subprocess.run( + [sys.executable, str(HARNESS_ROOT / "skill_generator.py"), str(HARNESS_ROOT), "--output", str(output_path)], + cwd=HARNESS_ROOT, + capture_output=True, + text=True, + ) + self.assertEqual(result.returncode, 0, msg=result.stderr) + content = output_path.read_text(encoding="utf-8") + self.assertIn("cli-anything-zotero", content) + self.assertIn("## Command Groups", content) + self.assertIn("### App", content) + self.assertIn("### Item", content) + finally: + output_path.unlink(missing_ok=True) diff --git a/zotero/agent-harness/cli_anything/zotero/tests/test_cli_entrypoint.py b/zotero/agent-harness/cli_anything/zotero/tests/test_cli_entrypoint.py new file mode 100644 index 000000000..651ffa892 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/tests/test_cli_entrypoint.py @@ -0,0 +1,410 @@ +from __future__ import annotations + +import json +import os +import shutil +import subprocess +import sys +import sysconfig +import tempfile +import unittest +from pathlib import Path +from unittest import mock + +from cli_anything.zotero.tests._helpers import create_sample_environment, fake_zotero_http_server, sample_pdf_bytes +from cli_anything.zotero.zotero_cli import dispatch, repl_help_text + + +REPO_ROOT = Path(__file__).resolve().parents[4] + + +def resolve_cli() -> list[str]: + force_installed = os.environ.get("CLI_ANYTHING_FORCE_INSTALLED", "").strip() == "1" + installed = shutil.which("cli-anything-zotero") + if installed: + return [installed] + scripts_dir = Path(sysconfig.get_path("scripts")) + for candidate in (scripts_dir / "cli-anything-zotero.exe", scripts_dir / "cli-anything-zotero"): + if candidate.exists(): + return [str(candidate)] + if force_installed: + raise RuntimeError("cli-anything-zotero not found in PATH. Install it with: py -m pip install -e .") + return [sys.executable, "-m", "cli_anything.zotero"] + + +def uses_module_fallback(cli_base: list[str]) -> bool: + return len(cli_base) >= 3 and cli_base[1] == "-m" + + +class CliEntrypointTests(unittest.TestCase): + CLI_BASE = resolve_cli() + + def setUp(self) -> None: + self.tmpdir = tempfile.TemporaryDirectory() + self.addCleanup(self.tmpdir.cleanup) + self.env_paths = create_sample_environment(Path(self.tmpdir.name)) + + def run_cli(self, args, input_text=None, extra_env=None): + env = os.environ.copy() + if uses_module_fallback(self.CLI_BASE): + env["PYTHONPATH"] = str(REPO_ROOT / "zotero" / "agent-harness") + os.pathsep + env.get("PYTHONPATH", "") + env["ZOTERO_PROFILE_DIR"] = str(self.env_paths["profile_dir"]) + env["ZOTERO_DATA_DIR"] = str(self.env_paths["data_dir"]) + env["ZOTERO_EXECUTABLE"] = str(self.env_paths["executable"]) + env["ZOTERO_HTTP_PORT"] = "23191" + env["CLI_ANYTHING_ZOTERO_STATE_DIR"] = str(Path(self.tmpdir.name) / "state") + if extra_env: + env.update(extra_env) + return subprocess.run(self.CLI_BASE + args, input=input_text, capture_output=True, text=True, env=env) + + def test_help_renders_groups(self): + result = self.run_cli(["--help"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("collection", result.stdout) + self.assertIn("item", result.stdout) + self.assertIn("import", result.stdout) + self.assertIn("note", result.stdout) + self.assertIn("session", result.stdout) + + def test_dispatch_uses_requested_prog_name(self): + result = dispatch(["--help"], prog_name="cli-anything-zotero") + self.assertEqual(result, 0) + + def test_force_installed_mode_requires_real_command(self): + with tempfile.TemporaryDirectory() as tmpdir: + with mock.patch.dict("os.environ", {"CLI_ANYTHING_FORCE_INSTALLED": "1"}, clear=False): + with mock.patch("shutil.which", return_value=None): + with mock.patch("sysconfig.get_path", return_value=tmpdir): + with self.assertRaises(RuntimeError): + resolve_cli() + + def test_repl_help_text_mentions_builtins(self): + self.assertIn("use-selected", repl_help_text()) + self.assertIn("current-item", repl_help_text()) + + def test_default_entrypoint_starts_repl(self): + result = self.run_cli([], input_text="exit\n") + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("cli-anything-zotero", result.stdout) + + def test_app_status_json(self): + result = self.run_cli(["--json", "app", "status"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"sqlite_exists": true', result.stdout) + + def test_app_enable_local_api_json(self): + result = self.run_cli(["--json", "app", "enable-local-api"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"enabled": true', result.stdout) + self.assertIn('"already_enabled": false', result.stdout) + + def test_collection_list_json(self): + result = self.run_cli(["--json", "collection", "list"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("Sample Collection", result.stdout) + + def test_collection_find_json(self): + result = self.run_cli(["--json", "collection", "find", "sample"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("COLLAAAA", result.stdout) + + def test_item_get_json(self): + result = self.run_cli(["--json", "item", "get", "REG12345"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("Sample Title", result.stdout) + + def test_item_find_and_notes_json(self): + with fake_zotero_http_server() as server: + result = self.run_cli( + ["--json", "item", "find", "Sample", "--collection", "COLLAAAA"], + extra_env={"ZOTERO_HTTP_PORT": str(server["port"])}, + ) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("REG12345", result.stdout) + + notes_result = self.run_cli(["--json", "item", "notes", "REG12345"]) + self.assertEqual(notes_result.returncode, 0, msg=notes_result.stderr) + self.assertIn("Example note", notes_result.stdout) + + def test_note_get_and_add(self): + result = self.run_cli(["--json", "note", "get", "NOTEKEY"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("Example note", result.stdout) + + with fake_zotero_http_server() as server: + add_result = self.run_cli( + ["--json", "note", "add", "REG12345", "--text", "A new note", "--format", "text"], + extra_env={"ZOTERO_HTTP_PORT": str(server["port"])}, + ) + self.assertEqual(add_result.returncode, 0, msg=add_result.stderr) + self.assertIn('"action": "note_add"', add_result.stdout) + + def test_item_context_and_analyze(self): + result = self.run_cli(["--json", "item", "context", "REG12345", "--include-notes", "--include-links"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"prompt_context"', result.stdout) + self.assertIn('"doi_url"', result.stdout) + + with fake_zotero_http_server() as server: + analyze_result = self.run_cli( + ["--json", "item", "analyze", "REG12345", "--question", "Summarize", "--model", "gpt-test"], + extra_env={ + "OPENAI_API_KEY": "test-key", + "CLI_ANYTHING_ZOTERO_OPENAI_URL": f"http://127.0.0.1:{server['port']}/v1/responses", + }, + ) + self.assertEqual(analyze_result.returncode, 0, msg=analyze_result.stderr) + self.assertIn('"answer": "Analysis text"', analyze_result.stdout) + + def test_session_status_json(self): + self.run_cli(["session", "use-item", "REG12345"]) + result = self.run_cli(["--json", "session", "status"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"current_item": "REG12345"', result.stdout) + + def test_session_use_library_normalizes_tree_view_library_ref(self): + result = self.run_cli(["--json", "session", "use-library", "L2"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"current_library": 2', result.stdout) + + def test_group_library_routes_use_group_scope(self): + with fake_zotero_http_server() as server: + extra_env = {"ZOTERO_HTTP_PORT": str(server["port"])} + use_library = self.run_cli(["--json", "session", "use-library", "L2"], extra_env=extra_env) + self.assertEqual(use_library.returncode, 0, msg=use_library.stderr) + + find_result = self.run_cli( + ["--json", "item", "find", "Group", "--collection", "GCOLLAAA"], + extra_env=extra_env, + ) + self.assertEqual(find_result.returncode, 0, msg=find_result.stderr) + self.assertIn("GROUPKEY", find_result.stdout) + + export_result = self.run_cli(["--json", "item", "export", "GROUPKEY", "--format", "ris"], extra_env=extra_env) + self.assertEqual(export_result.returncode, 0, msg=export_result.stderr) + self.assertIn("GROUPKEY", export_result.stdout) + + citation_result = self.run_cli( + ["--json", "item", "citation", "GROUPKEY", "--style", "apa", "--locale", "en-US"], + extra_env=extra_env, + ) + self.assertEqual(citation_result.returncode, 0, msg=citation_result.stderr) + self.assertIn("citation", citation_result.stdout) + + bibliography_result = self.run_cli( + ["--json", "item", "bibliography", "GROUPKEY", "--style", "apa", "--locale", "en-US"], + extra_env=extra_env, + ) + self.assertEqual(bibliography_result.returncode, 0, msg=bibliography_result.stderr) + self.assertIn("bibliography", bibliography_result.stdout) + + search_result = self.run_cli(["--json", "search", "items", "GSEARCHKEY"], extra_env=extra_env) + self.assertEqual(search_result.returncode, 0, msg=search_result.stderr) + self.assertIn("GROUPKEY", search_result.stdout) + + get_paths = [entry["path"] for entry in server["calls"] if entry["method"] == "GET"] + self.assertTrue(any("/api/groups/2/collections/GCOLLAAA/items/top" in path for path in get_paths)) + self.assertTrue(any("/api/groups/2/items/GROUPKEY?format=ris" in path for path in get_paths)) + self.assertTrue(any("/api/groups/2/items/GROUPKEY?format=json&include=citation" in path for path in get_paths)) + self.assertTrue(any("/api/groups/2/items/GROUPKEY?format=json&include=bib" in path for path in get_paths)) + self.assertTrue(any("/api/groups/2/searches/GSEARCHKEY/items?format=json" in path for path in get_paths)) + + def test_import_file_subprocess(self): + import_path = Path(self.tmpdir.name) / "sample.ris" + import_path.write_text("TY - JOUR\nTI - Imported Sample\nER - \n", encoding="utf-8") + with fake_zotero_http_server() as server: + result = self.run_cli( + ["--json", "import", "file", str(import_path), "--collection", "COLLAAAA", "--tag", "alpha"], + extra_env={"ZOTERO_HTTP_PORT": str(server["port"])}, + ) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"action": "import_file"', result.stdout) + self.assertIn('"treeViewID": "C1"', result.stdout) + + def test_import_json_subprocess(self): + import_path = Path(self.tmpdir.name) / "items.json" + import_path.write_text('[{"itemType": "journalArticle", "title": "Imported JSON"}]', encoding="utf-8") + with fake_zotero_http_server() as server: + result = self.run_cli( + ["--json", "import", "json", str(import_path), "--collection", "COLLAAAA", "--tag", "beta"], + extra_env={"ZOTERO_HTTP_PORT": str(server["port"])}, + ) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"action": "import_json"', result.stdout) + self.assertIn('"submitted_count": 1', result.stdout) + + def test_import_json_subprocess_with_inline_file_attachment(self): + pdf_path = Path(self.tmpdir.name) / "inline.pdf" + pdf_path.write_bytes(sample_pdf_bytes("subprocess-inline")) + import_path = Path(self.tmpdir.name) / "items-with-attachment.json" + title = "Imported JSON Attachment" + import_path.write_text( + json.dumps( + [ + { + "itemType": "journalArticle", + "title": title, + "attachments": [{"path": str(pdf_path)}], + } + ] + ), + encoding="utf-8", + ) + with fake_zotero_http_server(sqlite_path=self.env_paths["sqlite_path"], data_dir=self.env_paths["data_dir"]) as server: + result = self.run_cli( + ["--json", "import", "json", str(import_path), "--collection", "COLLAAAA"], + extra_env={"ZOTERO_HTTP_PORT": str(server["port"])}, + ) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"created_count": 1', result.stdout) + + find_result = self.run_cli(["--json", "item", "find", title, "--exact-title"]) + self.assertEqual(find_result.returncode, 0, msg=find_result.stderr) + imported_items = json.loads(find_result.stdout) + self.assertTrue(imported_items) + imported_item_id = str(imported_items[0]["itemID"]) + + attachments_result = self.run_cli(["--json", "item", "attachments", imported_item_id]) + self.assertEqual(attachments_result.returncode, 0, msg=attachments_result.stderr) + attachments = json.loads(attachments_result.stdout) + self.assertTrue(attachments) + self.assertTrue(attachments[0].get("resolvedPath", "").endswith(".pdf")) + + file_result = self.run_cli(["--json", "item", "file", imported_item_id]) + self.assertEqual(file_result.returncode, 0, msg=file_result.stderr) + item_file = json.loads(file_result.stdout) + self.assertTrue(item_file.get("exists")) + self.assertTrue(item_file.get("resolvedPath", "").endswith(".pdf")) + + def test_import_json_subprocess_with_url_attachment(self): + title = "Imported URL Attachment" + import_path = Path(self.tmpdir.name) / "items-with-url.json" + with fake_zotero_http_server(sqlite_path=self.env_paths["sqlite_path"], data_dir=self.env_paths["data_dir"]) as server: + import_path.write_text( + json.dumps( + [ + { + "itemType": "journalArticle", + "title": title, + "attachments": [{"url": f"http://127.0.0.1:{server['port']}/downloads/sample.pdf"}], + } + ] + ), + encoding="utf-8", + ) + result = self.run_cli( + ["--json", "import", "json", str(import_path), "--collection", "COLLAAAA"], + extra_env={"ZOTERO_HTTP_PORT": str(server["port"])}, + ) + attachment_calls = [entry for entry in server["calls"] if entry["path"].startswith("/connector/saveAttachment")] + + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"created_count": 1', result.stdout) + self.assertEqual(len(attachment_calls), 1) + self.assertEqual(attachment_calls[0]["metadata"]["url"], f"http://127.0.0.1:{server['port']}/downloads/sample.pdf") + + def test_import_file_subprocess_with_attachment_manifest(self): + ris_path = Path(self.tmpdir.name) / "manifest-import.ris" + ris_path.write_text("TY - JOUR\nTI - Imported Manifest Attachment\nER - \n", encoding="utf-8") + pdf_path = Path(self.tmpdir.name) / "manifest.pdf" + pdf_path.write_bytes(sample_pdf_bytes("manifest")) + manifest_path = Path(self.tmpdir.name) / "attachments-manifest.json" + manifest_path.write_text( + json.dumps([{"index": 0, "attachments": [{"path": str(pdf_path)}]}]), + encoding="utf-8", + ) + with fake_zotero_http_server(sqlite_path=self.env_paths["sqlite_path"], data_dir=self.env_paths["data_dir"]) as server: + result = self.run_cli( + [ + "--json", + "import", + "file", + str(ris_path), + "--collection", + "COLLAAAA", + "--attachments-manifest", + str(manifest_path), + ], + extra_env={"ZOTERO_HTTP_PORT": str(server["port"])}, + ) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"created_count": 1', result.stdout) + + def test_import_json_subprocess_partial_success_returns_nonzero(self): + pdf_path = Path(self.tmpdir.name) / "partial.pdf" + pdf_path.write_bytes(sample_pdf_bytes("partial")) + missing_path = Path(self.tmpdir.name) / "missing.pdf" + import_path = Path(self.tmpdir.name) / "partial-items.json" + import_path.write_text( + json.dumps( + [ + { + "itemType": "journalArticle", + "title": "Imported Partial", + "attachments": [ + {"path": str(pdf_path)}, + {"path": str(missing_path)}, + ], + } + ] + ), + encoding="utf-8", + ) + with fake_zotero_http_server(sqlite_path=self.env_paths["sqlite_path"], data_dir=self.env_paths["data_dir"]) as server: + result = self.run_cli( + ["--json", "import", "json", str(import_path), "--collection", "COLLAAAA"], + extra_env={"ZOTERO_HTTP_PORT": str(server["port"])}, + ) + self.assertEqual(result.returncode, 1, msg=result.stderr) + self.assertIn('"status": "partial_success"', result.stdout) + self.assertIn('"failed_count": 1', result.stdout) + + def test_import_json_subprocess_duplicate_attachment_is_idempotent(self): + pdf_path = Path(self.tmpdir.name) / "duplicate.pdf" + pdf_path.write_bytes(sample_pdf_bytes("duplicate")) + import_path = Path(self.tmpdir.name) / "duplicate-items.json" + import_path.write_text( + json.dumps( + [ + { + "itemType": "journalArticle", + "title": "Imported Duplicate Attachment", + "attachments": [{"path": str(pdf_path)}, {"path": str(pdf_path)}], + } + ] + ), + encoding="utf-8", + ) + with fake_zotero_http_server(sqlite_path=self.env_paths["sqlite_path"], data_dir=self.env_paths["data_dir"]) as server: + result = self.run_cli( + ["--json", "import", "json", str(import_path), "--collection", "COLLAAAA"], + extra_env={"ZOTERO_HTTP_PORT": str(server["port"])}, + ) + attachment_calls = [entry for entry in server["calls"] if entry["path"].startswith("/connector/saveAttachment")] + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"skipped_count": 1', result.stdout) + self.assertEqual(len(attachment_calls), 1) + + def test_experimental_collection_write_commands(self): + create = self.run_cli(["--json", "collection", "create", "Created By CLI", "--experimental"]) + self.assertEqual(create.returncode, 0, msg=create.stderr) + self.assertIn('"action": "collection_create"', create.stdout) + + add = self.run_cli(["--json", "item", "add-to-collection", "REG12345", "COLLBBBB", "--experimental"]) + self.assertEqual(add.returncode, 0, msg=add.stderr) + self.assertIn('"action": "item_add_to_collection"', add.stdout) + + move = self.run_cli( + [ + "--json", + "item", + "move-to-collection", + "REG67890", + "COLLAAAA", + "--from", + "COLLBBBB", + "--experimental", + ] + ) + self.assertEqual(move.returncode, 0, msg=move.stderr) + self.assertIn('"action": "item_move_to_collection"', move.stdout) diff --git a/zotero/agent-harness/cli_anything/zotero/tests/test_core.py b/zotero/agent-harness/cli_anything/zotero/tests/test_core.py new file mode 100644 index 000000000..046ffd008 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/tests/test_core.py @@ -0,0 +1,683 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from pathlib import Path +from unittest import mock + +from cli_anything.zotero.core import analysis, catalog, discovery, experimental, imports as imports_mod, notes as notes_mod, rendering, session as session_mod +from cli_anything.zotero.tests._helpers import create_sample_environment, fake_zotero_http_server, sample_pdf_bytes +from cli_anything.zotero.utils import openai_api, zotero_http, zotero_paths, zotero_sqlite + + +class PathDiscoveryTests(unittest.TestCase): + def test_build_environment_uses_active_profile_and_data_dir_pref(self): + with tempfile.TemporaryDirectory() as tmpdir: + env = create_sample_environment(Path(tmpdir)) + runtime_env = zotero_paths.build_environment( + explicit_profile_dir=str(env["profile_root"]), + explicit_executable=str(env["executable"]), + ) + self.assertEqual(runtime_env.profile_dir, env["profile_dir"]) + self.assertEqual(runtime_env.data_dir, env["data_dir"]) + self.assertEqual(runtime_env.sqlite_path, env["sqlite_path"]) + self.assertEqual(runtime_env.version, "7.0.32") + + def test_build_environment_accepts_env_profile_dir_pointing_to_profile(self): + with tempfile.TemporaryDirectory() as tmpdir: + env = create_sample_environment(Path(tmpdir)) + with mock.patch.dict("os.environ", {"ZOTERO_PROFILE_DIR": str(env["profile_dir"])}, clear=False): + runtime_env = zotero_paths.build_environment( + explicit_executable=str(env["executable"]), + explicit_data_dir=str(env["data_dir"]), + ) + self.assertEqual(runtime_env.profile_dir, env["profile_dir"]) + + def test_build_environment_falls_back_to_home_zotero(self): + with tempfile.TemporaryDirectory() as tmpdir: + profile_root = Path(tmpdir) / "AppData" / "Roaming" / "Zotero" / "Zotero" + profile_dir = profile_root / "Profiles" / "test.default" + profile_dir.mkdir(parents=True, exist_ok=True) + (profile_root / "profiles.ini").write_text("[Profile0]\nName=default\nIsRelative=1\nPath=Profiles/test.default\nDefault=1\n", encoding="utf-8") + (profile_dir / "prefs.js").write_text("", encoding="utf-8") + home = Path(tmpdir) / "Home" + (home / "Zotero").mkdir(parents=True, exist_ok=True) + with mock.patch("cli_anything.zotero.utils.zotero_paths.Path.home", return_value=home): + runtime_env = zotero_paths.build_environment(explicit_profile_dir=str(profile_root)) + self.assertEqual(runtime_env.data_dir, home / "Zotero") + + def test_ensure_local_api_enabled_writes_user_js(self): + with tempfile.TemporaryDirectory() as tmpdir: + env = create_sample_environment(Path(tmpdir)) + path = zotero_paths.ensure_local_api_enabled(env["profile_dir"]) + self.assertIsNotNone(path) + self.assertIn('extensions.zotero.httpServer.localAPI.enabled', path.read_text(encoding="utf-8")) + + +class SQLiteInspectionTests(unittest.TestCase): + def setUp(self) -> None: + self.tmpdir = tempfile.TemporaryDirectory() + self.addCleanup(self.tmpdir.cleanup) + self.env = create_sample_environment(Path(self.tmpdir.name)) + + def test_fetch_libraries(self): + libraries = zotero_sqlite.fetch_libraries(self.env["sqlite_path"]) + self.assertEqual(len(libraries), 2) + self.assertEqual([entry["type"] for entry in libraries], ["user", "group"]) + + def test_fetch_collections_and_tree(self): + collections = zotero_sqlite.fetch_collections(self.env["sqlite_path"], library_id=1) + self.assertIn("Sample Collection", [entry["collectionName"] for entry in collections]) + tree = zotero_sqlite.build_collection_tree(collections) + self.assertIn("Sample Collection", [entry["collectionName"] for entry in tree]) + + def test_resolve_item_includes_fields_creators_tags(self): + item = zotero_sqlite.resolve_item(self.env["sqlite_path"], "REG12345") + self.assertEqual(item["title"], "Sample Title") + self.assertEqual(item["fields"]["title"], "Sample Title") + self.assertEqual(item["creators"][0]["lastName"], "Lovelace") + self.assertEqual(item["tags"][0]["name"], "sample-tag") + + def test_fetch_item_children_and_attachments(self): + children = zotero_sqlite.fetch_item_children(self.env["sqlite_path"], "REG12345") + self.assertEqual(len(children), 2) + attachments = zotero_sqlite.fetch_item_attachments(self.env["sqlite_path"], "REG12345") + self.assertEqual(len(attachments), 1) + resolved = zotero_sqlite.resolve_attachment_real_path(attachments[0], self.env["data_dir"]) + self.assertTrue(str(resolved).endswith("paper.pdf")) + + linked_attachments = zotero_sqlite.fetch_item_attachments(self.env["sqlite_path"], "REG67890") + self.assertEqual(len(linked_attachments), 1) + linked_resolved = zotero_sqlite.resolve_attachment_real_path(linked_attachments[0], self.env["data_dir"]) + self.assertEqual(linked_resolved, "C:\\Users\\Public\\linked.pdf") + + def test_duplicate_key_resolution_requires_library_context(self): + with self.assertRaises(zotero_sqlite.AmbiguousReferenceError): + zotero_sqlite.resolve_item(self.env["sqlite_path"], "DUPITEM1") + with self.assertRaises(zotero_sqlite.AmbiguousReferenceError): + zotero_sqlite.resolve_collection(self.env["sqlite_path"], "DUPCOLL1") + with self.assertRaises(zotero_sqlite.AmbiguousReferenceError): + zotero_sqlite.resolve_saved_search(self.env["sqlite_path"], "DUPSEARCH") + + user_item = zotero_sqlite.resolve_item(self.env["sqlite_path"], "DUPITEM1", library_id=1) + group_item = zotero_sqlite.resolve_item(self.env["sqlite_path"], "DUPITEM1", library_id=2) + self.assertEqual(user_item["title"], "User Duplicate Title") + self.assertEqual(group_item["title"], "Group Duplicate Title") + + group_collection = zotero_sqlite.resolve_collection(self.env["sqlite_path"], "DUPCOLL1", library_id=2) + self.assertEqual(group_collection["collectionName"], "Group Duplicate Collection") + + group_search = zotero_sqlite.resolve_saved_search(self.env["sqlite_path"], "DUPSEARCH", library_id=2) + self.assertEqual(group_search["savedSearchName"], "Group Duplicate Search") + + def test_cross_library_unique_key_still_resolves_without_session_context(self): + group_item = zotero_sqlite.resolve_item(self.env["sqlite_path"], "GROUPKEY") + self.assertEqual(group_item["libraryID"], 2) + group_collection = zotero_sqlite.resolve_collection(self.env["sqlite_path"], "GCOLLAAA") + self.assertEqual(group_collection["libraryID"], 2) + + def test_fetch_saved_searches_and_tags(self): + searches = zotero_sqlite.fetch_saved_searches(self.env["sqlite_path"], library_id=1) + self.assertEqual(searches[0]["savedSearchName"], "Important") + tags = zotero_sqlite.fetch_tags(self.env["sqlite_path"], library_id=1) + self.assertEqual(tags[0]["name"], "sample-tag") + items = zotero_sqlite.fetch_tag_items(self.env["sqlite_path"], "sample-tag", library_id=1) + self.assertGreaterEqual(len(items), 1) + + def test_find_collections_and_items_and_notes(self): + collections = zotero_sqlite.find_collections(self.env["sqlite_path"], "collection", library_id=1, limit=10) + self.assertGreaterEqual(len(collections), 2) + self.assertIn("Archive Collection", [entry["collectionName"] for entry in collections]) + + fuzzy_items = zotero_sqlite.find_items_by_title(self.env["sqlite_path"], "Sample", library_id=1, limit=10) + self.assertEqual(fuzzy_items[0]["key"], "REG12345") + exact_items = zotero_sqlite.find_items_by_title(self.env["sqlite_path"], "Sample Title", library_id=1, exact_title=True, limit=10) + self.assertEqual(exact_items[0]["itemID"], 1) + + notes = zotero_sqlite.fetch_item_notes(self.env["sqlite_path"], "REG12345") + self.assertEqual(notes[0]["typeName"], "note") + self.assertEqual(notes[0]["noteText"], "Example note") + + def test_experimental_sqlite_write_helpers(self): + created = zotero_sqlite.create_collection_record(self.env["sqlite_path"], name="Created Here", library_id=1, parent_collection_id=1) + self.assertEqual(created["collectionName"], "Created Here") + self.assertTrue(Path(created["backupPath"]).exists()) + + added = zotero_sqlite.add_item_to_collection_record(self.env["sqlite_path"], item_id=1, collection_id=2) + self.assertTrue(Path(added["backupPath"]).exists()) + + moved = zotero_sqlite.move_item_between_collections_record( + self.env["sqlite_path"], + item_id=4, + target_collection_id=1, + source_collection_ids=[2], + ) + self.assertTrue(Path(moved["backupPath"]).exists()) + memberships = zotero_sqlite.fetch_item_collections(self.env["sqlite_path"], 4) + self.assertEqual([membership["collectionID"] for membership in memberships], [1]) + + +class SessionTests(unittest.TestCase): + def test_save_and_load_session_state(self): + with tempfile.TemporaryDirectory() as tmpdir: + with mock.patch.dict("os.environ", {"CLI_ANYTHING_ZOTERO_STATE_DIR": tmpdir}, clear=False): + state = session_mod.default_session_state() + state["current_item"] = "REG12345" + session_mod.save_session_state(state) + loaded = session_mod.load_session_state() + self.assertEqual(loaded["current_item"], "REG12345") + + def test_expand_repl_aliases(self): + state = {"current_library": "1", "current_collection": "2", "current_item": "REG12345"} + expanded = session_mod.expand_repl_aliases_with_state(["item", "get", "@item", "@collection"], state) + self.assertEqual(expanded, ["item", "get", "REG12345", "2"]) + + def test_normalize_library_ref_accepts_plain_and_tree_view_ids(self): + self.assertEqual(zotero_sqlite.normalize_library_ref("1"), 1) + self.assertEqual(zotero_sqlite.normalize_library_ref("L1"), 1) + self.assertEqual(zotero_sqlite.normalize_library_ref(2), 2) + + +class HttpUtilityTests(unittest.TestCase): + def test_build_runtime_context_reports_unavailable_services(self): + with tempfile.TemporaryDirectory() as tmpdir: + env = create_sample_environment(Path(tmpdir)) + prefs_path = env["profile_dir"] / "prefs.js" + prefs_text = prefs_path.read_text(encoding="utf-8").replace("23119", "23191") + prefs_path.write_text(prefs_text, encoding="utf-8") + runtime = discovery.build_runtime_context( + data_dir=str(env["data_dir"]), + profile_dir=str(env["profile_dir"]), + executable=str(env["executable"]), + ) + self.assertFalse(runtime.connector_available) + self.assertFalse(runtime.local_api_available) + + def test_catalog_style_list_parses_csl(self): + with tempfile.TemporaryDirectory() as tmpdir: + env = create_sample_environment(Path(tmpdir)) + runtime = discovery.build_runtime_context( + data_dir=str(env["data_dir"]), + profile_dir=str(env["profile_dir"]), + executable=str(env["executable"]), + ) + styles = catalog.list_styles(runtime) + self.assertEqual(styles[0]["title"], "Sample Style") + + def test_wait_for_endpoint_requires_explicit_ready_status(self): + with fake_zotero_http_server(local_api_root_status=403) as server: + ready = zotero_http.wait_for_endpoint( + server["port"], + "/api/", + timeout=1, + poll_interval=0.05, + headers={"Zotero-API-Version": zotero_http.LOCAL_API_VERSION}, + ) + self.assertFalse(ready) + + with fake_zotero_http_server(local_api_root_status=200) as server: + ready = zotero_http.wait_for_endpoint( + server["port"], + "/api/", + timeout=1, + poll_interval=0.05, + headers={"Zotero-API-Version": zotero_http.LOCAL_API_VERSION}, + ) + self.assertTrue(ready) + + +class ImportCoreTests(unittest.TestCase): + def setUp(self) -> None: + self.tmpdir = tempfile.TemporaryDirectory() + self.addCleanup(self.tmpdir.cleanup) + self.env = create_sample_environment(Path(self.tmpdir.name)) + self.runtime = discovery.build_runtime_context( + data_dir=str(self.env["data_dir"]), + profile_dir=str(self.env["profile_dir"]), + executable=str(self.env["executable"]), + ) + + def test_enable_local_api_reports_idempotent_state(self): + payload = imports_mod.enable_local_api(self.runtime) + self.assertTrue(payload["enabled"]) + self.assertFalse(payload["already_enabled"]) + self.assertTrue(Path(payload["user_js_path"]).exists()) + + refreshed = discovery.build_runtime_context( + data_dir=str(self.env["data_dir"]), + profile_dir=str(self.env["profile_dir"]), + executable=str(self.env["executable"]), + ) + second = imports_mod.enable_local_api(refreshed) + self.assertTrue(second["already_enabled"]) + + def test_import_json_uses_session_collection_and_tags(self): + json_path = Path(self.tmpdir.name) / "items.json" + json_path.write_text('[{"itemType": "journalArticle", "title": "Imported"}]', encoding="utf-8") + + with mock.patch.object(self.runtime, "connector_available", True): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_save_items") as save_items: + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_update_session") as update_session: + payload = imports_mod.import_json( + self.runtime, + json_path, + tags=["alpha", "beta"], + session={"current_collection": "COLLAAAA"}, + ) + + save_items.assert_called_once() + submitted_items = save_items.call_args.args[1] + self.assertEqual(submitted_items[0]["title"], "Imported") + self.assertTrue(submitted_items[0]["id"].startswith("cli-anything-zotero-")) + update_session.assert_called_once() + self.assertEqual(update_session.call_args.kwargs["target"], "C1") + self.assertEqual(update_session.call_args.kwargs["tags"], ["alpha", "beta"]) + self.assertEqual(payload["submitted_count"], 1) + self.assertEqual(payload["target"]["treeViewID"], "C1") + + def test_import_file_posts_raw_text_and_explicit_tree_view_target(self): + ris_path = Path(self.tmpdir.name) / "sample.ris" + ris_path.write_text("TY - JOUR\nTI - Imported Title\nER - \n", encoding="utf-8") + + with mock.patch.object(self.runtime, "connector_available", True): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_import_text", return_value=[{"title": "Imported Title"}]) as import_text: + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_update_session") as update_session: + payload = imports_mod.import_file( + self.runtime, + ris_path, + collection_ref="C99", + tags=["imported"], + ) + + import_text.assert_called_once() + self.assertIn("Imported Title", import_text.call_args.args[1]) + update_session.assert_called_once() + self.assertEqual(update_session.call_args.kwargs["target"], "C99") + self.assertEqual(payload["imported_count"], 1) + + def test_import_json_strips_inline_attachments_and_uploads_local_pdf(self): + pdf_path = Path(self.tmpdir.name) / "inline.pdf" + pdf_path.write_bytes(sample_pdf_bytes("inline")) + json_path = Path(self.tmpdir.name) / "items.json" + json_path.write_text( + '[{"itemType": "journalArticle", "title": "Imported", "attachments": [{"path": "%s"}]}]' % str(pdf_path).replace("\\", "\\\\"), + encoding="utf-8", + ) + + with mock.patch.object(self.runtime, "connector_available", True): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_save_items") as save_items: + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_update_session"): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_save_attachment") as save_attachment: + payload = imports_mod.import_json( + self.runtime, + json_path, + attachment_timeout=91, + ) + + submitted_items = save_items.call_args.args[1] + self.assertNotIn("attachments", submitted_items[0]) + self.assertEqual(payload["attachment_summary"]["created_count"], 1) + self.assertEqual(payload["status"], "success") + save_attachment.assert_called_once() + self.assertEqual(save_attachment.call_args.kwargs["parent_item_id"], submitted_items[0]["id"]) + self.assertEqual(save_attachment.call_args.kwargs["timeout"], 91) + self.assertTrue(save_attachment.call_args.kwargs["url"].startswith("file:///")) + self.assertTrue(save_attachment.call_args.kwargs["content"].startswith(b"%PDF-")) + + def test_import_json_url_attachment_uses_delay_and_default_timeout(self): + json_path = Path(self.tmpdir.name) / "items.json" + with fake_zotero_http_server() as server: + json_path.write_text( + json.dumps( + [ + { + "itemType": "journalArticle", + "title": "Imported URL", + "attachments": [ + { + "url": f"http://127.0.0.1:{server['port']}/downloads/wrong-content-type.pdf", + "delay_ms": 10, + } + ], + } + ] + ), + encoding="utf-8", + ) + with mock.patch.object(self.runtime, "connector_available", True): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_save_items"): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_update_session"): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_save_attachment") as save_attachment: + with mock.patch("cli_anything.zotero.core.imports.time.sleep") as sleep: + payload = imports_mod.import_json( + self.runtime, + json_path, + attachment_timeout=47, + ) + + sleep.assert_called_once_with(0.01) + save_attachment.assert_called_once() + self.assertEqual(save_attachment.call_args.kwargs["timeout"], 47) + self.assertEqual(payload["attachment_summary"]["created_count"], 1) + + def test_import_json_duplicate_inline_attachments_are_skipped(self): + pdf_path = Path(self.tmpdir.name) / "duplicate.pdf" + pdf_path.write_bytes(sample_pdf_bytes("duplicate")) + json_path = Path(self.tmpdir.name) / "items.json" + json_path.write_text( + json.dumps( + [ + { + "itemType": "journalArticle", + "title": "Imported Duplicate", + "attachments": [ + {"path": str(pdf_path)}, + {"path": str(pdf_path)}, + ], + } + ] + ), + encoding="utf-8", + ) + + with mock.patch.object(self.runtime, "connector_available", True): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_save_items"): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_update_session"): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_save_attachment") as save_attachment: + payload = imports_mod.import_json(self.runtime, json_path) + + save_attachment.assert_called_once() + self.assertEqual(payload["attachment_summary"]["created_count"], 1) + self.assertEqual(payload["attachment_summary"]["skipped_count"], 1) + self.assertEqual(payload["attachment_results"][1]["status"], "skipped_duplicate") + + def test_import_json_rejects_invalid_inline_attachment_schema(self): + json_path = Path(self.tmpdir.name) / "invalid-attachments.json" + json_path.write_text( + json.dumps( + [ + { + "itemType": "journalArticle", + "title": "Broken", + "attachments": [{"path": "a.pdf", "url": "https://example.com/a.pdf"}], + } + ] + ), + encoding="utf-8", + ) + with mock.patch.object(self.runtime, "connector_available", True): + with self.assertRaises(RuntimeError): + imports_mod.import_json(self.runtime, json_path) + + def test_import_file_manifest_partial_success_records_attachment_failures(self): + ris_path = Path(self.tmpdir.name) / "sample.ris" + ris_path.write_text("TY - JOUR\nTI - Imported Title\nER - \n", encoding="utf-8") + pdf_path = Path(self.tmpdir.name) / "manifest.pdf" + pdf_path.write_bytes(sample_pdf_bytes("manifest")) + manifest_path = Path(self.tmpdir.name) / "attachments.json" + manifest_path.write_text( + json.dumps( + [ + { + "index": 0, + "attachments": [ + {"path": str(pdf_path)}, + {"path": str(Path(self.tmpdir.name) / "missing.pdf")}, + ], + } + ] + ), + encoding="utf-8", + ) + + with mock.patch.object(self.runtime, "connector_available", True): + with mock.patch( + "cli_anything.zotero.utils.zotero_http.connector_import_text", + return_value=[{"id": "imported-1", "title": "Imported Title"}], + ): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_update_session"): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_save_attachment") as save_attachment: + payload = imports_mod.import_file( + self.runtime, + ris_path, + attachments_manifest=manifest_path, + ) + + save_attachment.assert_called_once() + self.assertEqual(payload["status"], "partial_success") + self.assertEqual(payload["attachment_summary"]["created_count"], 1) + self.assertEqual(payload["attachment_summary"]["failed_count"], 1) + self.assertIn("Attachment file not found", payload["attachment_results"][1]["error"]) + + def test_import_file_manifest_title_mismatch_marks_attachment_failure(self): + ris_path = Path(self.tmpdir.name) / "sample.ris" + ris_path.write_text("TY - JOUR\nTI - Imported Title\nER - \n", encoding="utf-8") + pdf_path = Path(self.tmpdir.name) / "manifest.pdf" + pdf_path.write_bytes(sample_pdf_bytes("manifest")) + manifest_path = Path(self.tmpdir.name) / "attachments.json" + manifest_path.write_text( + json.dumps( + [ + { + "index": 0, + "expected_title": "Different Title", + "attachments": [{"path": str(pdf_path)}], + } + ] + ), + encoding="utf-8", + ) + + with mock.patch.object(self.runtime, "connector_available", True): + with mock.patch( + "cli_anything.zotero.utils.zotero_http.connector_import_text", + return_value=[{"id": "imported-1", "title": "Imported Title"}], + ): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_update_session"): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_save_attachment") as save_attachment: + payload = imports_mod.import_file( + self.runtime, + ris_path, + attachments_manifest=manifest_path, + ) + + save_attachment.assert_not_called() + self.assertEqual(payload["status"], "partial_success") + self.assertIn("title mismatch", payload["attachment_results"][0]["error"]) + + def test_import_file_manifest_index_out_of_range_and_missing_connector_id_fail_cleanly(self): + ris_path = Path(self.tmpdir.name) / "sample.ris" + ris_path.write_text("TY - JOUR\nTI - Imported Title\nER - \n", encoding="utf-8") + pdf_path = Path(self.tmpdir.name) / "manifest.pdf" + pdf_path.write_bytes(sample_pdf_bytes("manifest")) + manifest_path = Path(self.tmpdir.name) / "attachments.json" + manifest_path.write_text( + json.dumps( + [ + {"index": 1, "attachments": [{"path": str(pdf_path)}]}, + {"index": 0, "attachments": [{"path": str(pdf_path)}]}, + ] + ), + encoding="utf-8", + ) + + with mock.patch.object(self.runtime, "connector_available", True): + with mock.patch( + "cli_anything.zotero.utils.zotero_http.connector_import_text", + return_value=[{"title": "Imported Title"}], + ): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_update_session"): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_save_attachment") as save_attachment: + payload = imports_mod.import_file( + self.runtime, + ris_path, + attachments_manifest=manifest_path, + ) + + save_attachment.assert_not_called() + self.assertEqual(payload["attachment_summary"]["failed_count"], 2) + self.assertIn("index 1", payload["attachment_results"][0]["error"]) + self.assertIn("did not include a connector id", payload["attachment_results"][1]["error"]) + + def test_import_json_rejects_invalid_json(self): + json_path = Path(self.tmpdir.name) / "bad.json" + json_path.write_text("{not-valid", encoding="utf-8") + with mock.patch.object(self.runtime, "connector_available", True): + with self.assertRaises(RuntimeError): + imports_mod.import_json(self.runtime, json_path) + + def test_import_requires_connector(self): + json_path = Path(self.tmpdir.name) / "items.json" + json_path.write_text("[]", encoding="utf-8") + with mock.patch.object(self.runtime, "connector_available", False): + with self.assertRaises(RuntimeError): + imports_mod.import_json(self.runtime, json_path) + + +class WorkflowCoreTests(unittest.TestCase): + def setUp(self) -> None: + self.tmpdir = tempfile.TemporaryDirectory() + self.addCleanup(self.tmpdir.cleanup) + self.env = create_sample_environment(Path(self.tmpdir.name)) + self.runtime = discovery.build_runtime_context( + data_dir=str(self.env["data_dir"]), + profile_dir=str(self.env["profile_dir"]), + executable=str(self.env["executable"]), + ) + + def test_collection_find_and_item_find_sqlite_fallback(self): + collections = catalog.find_collections(self.runtime, "sample", limit=10) + self.assertEqual(collections[0]["key"], "COLLAAAA") + + with mock.patch.object(self.runtime, "local_api_available", False): + items = catalog.find_items(self.runtime, "Sample", limit=10, session={}) + self.assertEqual(items[0]["key"], "REG12345") + + exact = catalog.find_items(self.runtime, "Sample Title", exact_title=True, limit=10, session={}) + self.assertEqual(exact[0]["itemID"], 1) + + def test_collection_scoped_item_find_prefers_local_api(self): + with mock.patch.object(self.runtime, "local_api_available", True): + with mock.patch("cli_anything.zotero.utils.zotero_http.local_api_get_json", return_value=[{"key": "REG12345"}]) as local_api: + items = catalog.find_items(self.runtime, "Sample", collection_ref="COLLAAAA", limit=5, session={}) + local_api.assert_called_once() + self.assertEqual(items[0]["key"], "REG12345") + + def test_group_library_local_api_scope_and_search_routes(self): + self.assertEqual(catalog.local_api_scope(self.runtime, 1), "/api/users/0") + self.assertEqual(catalog.local_api_scope(self.runtime, 2), "/api/groups/2") + + with mock.patch.object(self.runtime, "local_api_available", True): + with mock.patch("cli_anything.zotero.utils.zotero_http.local_api_get_json", return_value=[{"key": "GROUPKEY"}]) as local_api: + items = catalog.find_items( + self.runtime, + "Group", + collection_ref="GCOLLAAA", + limit=5, + session={"current_library": 2}, + ) + self.assertEqual(items[0]["libraryID"], 2) + self.assertIn("/api/groups/2/collections/GCOLLAAA/items/top", local_api.call_args.args[1]) + + with mock.patch.object(self.runtime, "local_api_available", True): + with mock.patch("cli_anything.zotero.utils.zotero_http.local_api_get_json", return_value=[{"key": "GROUPKEY"}]) as local_api: + payload = catalog.search_items(self.runtime, "GSEARCHKEY", session={"current_library": 2}) + self.assertEqual(payload[0]["key"], "GROUPKEY") + self.assertIn("/api/groups/2/searches/GSEARCHKEY/items", local_api.call_args.args[1]) + + def test_item_notes_and_note_get(self): + item_notes = catalog.item_notes(self.runtime, "REG12345") + self.assertEqual(len(item_notes), 1) + self.assertEqual(item_notes[0]["notePreview"], "Example note") + + note = notes_mod.get_note(self.runtime, "NOTEKEY") + self.assertEqual(note["noteText"], "Example note") + + def test_note_add_builds_child_note_payload(self): + with mock.patch.object(self.runtime, "connector_available", True): + with mock.patch("cli_anything.zotero.utils.zotero_http.get_selected_collection", return_value={"libraryID": 1}): + with mock.patch("cli_anything.zotero.utils.zotero_http.connector_save_items") as save_items: + payload = notes_mod.add_note( + self.runtime, + "REG12345", + text="# Heading\n\nA **bold** note", + fmt="markdown", + ) + save_items.assert_called_once() + submitted = save_items.call_args.args[1][0] + self.assertEqual(submitted["itemType"], "note") + self.assertEqual(submitted["parentItem"], "REG12345") + self.assertIn("

", submitted["note"]) + self.assertEqual(payload["parentItemKey"], "REG12345") + + def test_item_context_aggregates_exports_and_links(self): + with mock.patch.object(self.runtime, "local_api_available", True): + with mock.patch("cli_anything.zotero.core.rendering.export_item", side_effect=[{"content": "@article{sample}"}, {"content": '{"id":"sample"}'}]): + payload = analysis.build_item_context( + self.runtime, + "REG12345", + include_notes=True, + include_bibtex=True, + include_csljson=True, + include_links=True, + ) + self.assertEqual(payload["links"]["doi_url"], "https://doi.org/10.1000/sample") + self.assertIn("bibtex", payload["exports"]) + self.assertIn("Notes:", payload["prompt_context"]) + + def test_item_analyze_requires_api_key_and_uses_openai(self): + with mock.patch.dict("os.environ", {"OPENAI_API_KEY": ""}, clear=False): + with self.assertRaises(RuntimeError): + analysis.analyze_item(self.runtime, "REG12345", question="Summarize", model="gpt-test") + + with mock.patch.dict("os.environ", {"OPENAI_API_KEY": "test-key"}, clear=False): + with mock.patch("cli_anything.zotero.core.analysis.build_item_context", return_value={"item": {"key": "REG12345"}, "prompt_context": "Title: Sample"}): + with mock.patch("cli_anything.zotero.utils.openai_api.create_text_response", return_value={"response_id": "resp_123", "answer": "Analysis", "raw": {}}) as create_response: + payload = analysis.analyze_item(self.runtime, "REG12345", question="Summarize", model="gpt-test") + create_response.assert_called_once() + self.assertEqual(payload["answer"], "Analysis") + + def test_experimental_commands_require_closed_zotero_and_update_db_copy(self): + with mock.patch.object(self.runtime, "connector_available", True): + with self.assertRaises(RuntimeError): + experimental.create_collection(self.runtime, "Blocked") + + with mock.patch.object(self.runtime, "connector_available", False): + created = experimental.create_collection(self.runtime, "Created") + self.assertEqual(created["action"], "collection_create") + + added = experimental.add_item_to_collection(self.runtime, "REG12345", "COLLBBBB") + self.assertEqual(added["action"], "item_add_to_collection") + + moved = experimental.move_item_to_collection( + self.runtime, + "REG67890", + "COLLAAAA", + from_refs=["COLLBBBB"], + ) + self.assertEqual(moved["action"], "item_move_to_collection") + + def test_rendering_uses_group_library_local_api_scope(self): + with mock.patch.object(self.runtime, "local_api_available", True): + with mock.patch("cli_anything.zotero.utils.zotero_http.local_api_get_text", return_value="TY - JOUR\nER - \n") as get_text: + export_payload = rendering.export_item(self.runtime, "GROUPKEY", "ris", session={"current_library": 2}) + self.assertEqual(export_payload["libraryID"], 2) + self.assertIn("/api/groups/2/items/GROUPKEY", get_text.call_args.args[1]) + + +class OpenAIUtilityTests(unittest.TestCase): + def test_extract_text_from_response_payload(self): + payload = { + "id": "resp_1", + "output": [ + { + "type": "message", + "content": [ + {"type": "output_text", "text": "Hello world"}, + ], + } + ], + } + result = openai_api._extract_text(payload) + self.assertEqual(result, "Hello world") diff --git a/zotero/agent-harness/cli_anything/zotero/tests/test_full_e2e.py b/zotero/agent-harness/cli_anything/zotero/tests/test_full_e2e.py new file mode 100644 index 000000000..45c09a6d1 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/tests/test_full_e2e.py @@ -0,0 +1,351 @@ +from __future__ import annotations + +import json +import os +import shutil +import subprocess +import sys +import sysconfig +import tempfile +import unittest +import uuid +from pathlib import Path + +from cli_anything.zotero.core import discovery +from cli_anything.zotero.tests._helpers import sample_pdf_bytes +from cli_anything.zotero.utils import zotero_paths, zotero_sqlite + + +REPO_ROOT = Path(__file__).resolve().parents[4] + + +def resolve_cli() -> list[str]: + force_installed = os.environ.get("CLI_ANYTHING_FORCE_INSTALLED", "").strip() == "1" + installed = shutil.which("cli-anything-zotero") + if installed: + return [installed] + scripts_dir = Path(sysconfig.get_path("scripts")) + for candidate in (scripts_dir / "cli-anything-zotero.exe", scripts_dir / "cli-anything-zotero"): + if candidate.exists(): + return [str(candidate)] + if force_installed: + raise RuntimeError("cli-anything-zotero not found in PATH. Install it with: py -m pip install -e .") + return [sys.executable, "-m", "cli_anything.zotero"] + + +def uses_module_fallback(cli_base: list[str]) -> bool: + return len(cli_base) >= 3 and cli_base[1] == "-m" + + +ENVIRONMENT = zotero_paths.build_environment() +HAS_LOCAL_DATA = ENVIRONMENT.sqlite_exists + + +def choose_regular_item() -> dict | None: + if not HAS_LOCAL_DATA: + return None + items = zotero_sqlite.fetch_items(ENVIRONMENT.sqlite_path, library_id=zotero_sqlite.default_library_id(ENVIRONMENT.sqlite_path), limit=50) + for item in items: + if item["typeName"] not in {"attachment", "note"} and item.get("title"): + return item + return None + + +def choose_item_with_attachment() -> dict | None: + if not HAS_LOCAL_DATA: + return None + library_id = zotero_sqlite.default_library_id(ENVIRONMENT.sqlite_path) + items = zotero_sqlite.fetch_items(ENVIRONMENT.sqlite_path, library_id=library_id, limit=100) + for item in items: + if item["typeName"] in {"attachment", "note", "annotation"}: + continue + attachments = zotero_sqlite.fetch_item_attachments(ENVIRONMENT.sqlite_path, item["itemID"]) + if attachments: + return item + return None + + +def choose_item_with_note() -> dict | None: + if not HAS_LOCAL_DATA: + return None + library_id = zotero_sqlite.default_library_id(ENVIRONMENT.sqlite_path) + items = zotero_sqlite.fetch_items(ENVIRONMENT.sqlite_path, library_id=library_id, limit=100) + for item in items: + if item["typeName"] in {"attachment", "note", "annotation"}: + continue + notes = zotero_sqlite.fetch_item_notes(ENVIRONMENT.sqlite_path, item["itemID"]) + if notes: + return item + return None + + +SAMPLE_ITEM = choose_regular_item() +ATTACHMENT_SAMPLE_ITEM = choose_item_with_attachment() +NOTE_SAMPLE_ITEM = choose_item_with_note() + + +def choose_collection() -> dict | None: + if not HAS_LOCAL_DATA: + return None + collections = zotero_sqlite.fetch_collections(ENVIRONMENT.sqlite_path, library_id=zotero_sqlite.default_library_id(ENVIRONMENT.sqlite_path)) + return collections[0] if collections else None + + +def choose_tag_name() -> str | None: + if not HAS_LOCAL_DATA: + return None + tags = zotero_sqlite.fetch_tags(ENVIRONMENT.sqlite_path, library_id=zotero_sqlite.default_library_id(ENVIRONMENT.sqlite_path)) + return tags[0]["name"] if tags else None + + +SAMPLE_COLLECTION = choose_collection() +SAMPLE_TAG = choose_tag_name() +SEARCHES = zotero_sqlite.fetch_saved_searches(ENVIRONMENT.sqlite_path, library_id=zotero_sqlite.default_library_id(ENVIRONMENT.sqlite_path)) if HAS_LOCAL_DATA else [] +SAMPLE_SEARCH = SEARCHES[0] if SEARCHES else None + + +@unittest.skipUnless(HAS_LOCAL_DATA, "Local Zotero data directory not found") +class ZoteroFullE2E(unittest.TestCase): + CLI_BASE = resolve_cli() + + @classmethod + def setUpClass(cls) -> None: + discovery.ensure_live_api_enabled() + runtime = discovery.build_runtime_context() + if not runtime.connector_available: + discovery.launch_zotero(runtime, wait_timeout=45) + cls.runtime = discovery.build_runtime_context() + + def run_cli(self, args): + env = os.environ.copy() + if uses_module_fallback(self.CLI_BASE): + env["PYTHONPATH"] = str(REPO_ROOT / "zotero" / "agent-harness") + os.pathsep + env.get("PYTHONPATH", "") + return subprocess.run(self.CLI_BASE + args, capture_output=True, text=True, env=env, timeout=60) + + def run_cli_with_retry(self, args, retries: int = 2): + last = None + for _ in range(retries): + last = self.run_cli(args) + if last.returncode == 0: + return last + return last + + def test_sqlite_inventory_commands(self): + result = self.run_cli(["--json", "collection", "list"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("collectionName", result.stdout) + + result = self.run_cli(["--json", "item", "list"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("itemID", result.stdout) + + result = self.run_cli(["--json", "style", "list"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("title", result.stdout) + + result = self.run_cli(["--json", "search", "list"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + + @unittest.skipUnless(SAMPLE_ITEM is not None, "No regular Zotero item found") + def test_item_find_and_context_commands(self): + assert SAMPLE_ITEM is not None + title = zotero_sqlite.resolve_item(ENVIRONMENT.sqlite_path, SAMPLE_ITEM["itemID"])["title"] + query = title.split()[0] + + item_find = self.run_cli(["--json", "item", "find", query, "--limit", "5"]) + self.assertEqual(item_find.returncode, 0, msg=item_find.stderr) + self.assertIn(SAMPLE_ITEM["key"], item_find.stdout) + + exact_find = self.run_cli(["--json", "item", "find", title, "--exact-title"]) + self.assertEqual(exact_find.returncode, 0, msg=exact_find.stderr) + self.assertIn(SAMPLE_ITEM["key"], exact_find.stdout) + + context_result = self.run_cli(["--json", "item", "context", str(SAMPLE_ITEM["itemID"]), "--include-links"]) + self.assertEqual(context_result.returncode, 0, msg=context_result.stderr) + self.assertIn('"prompt_context"', context_result.stdout) + + @unittest.skipUnless(ATTACHMENT_SAMPLE_ITEM is not None, "No Zotero item with attachments found") + def test_attachment_inventory_commands(self): + assert ATTACHMENT_SAMPLE_ITEM is not None + attachments = self.run_cli(["--json", "item", "attachments", str(ATTACHMENT_SAMPLE_ITEM["itemID"])]) + self.assertEqual(attachments.returncode, 0, msg=attachments.stderr) + attachment_data = json.loads(attachments.stdout) + self.assertTrue(attachment_data) + self.assertTrue(attachment_data[0].get("resolvedPath")) + + item_file = self.run_cli(["--json", "item", "file", str(ATTACHMENT_SAMPLE_ITEM["itemID"])]) + self.assertEqual(item_file.returncode, 0, msg=item_file.stderr) + item_file_data = json.loads(item_file.stdout) + self.assertTrue(item_file_data.get("exists")) + self.assertTrue(item_file_data.get("resolvedPath")) + + @unittest.skipUnless(NOTE_SAMPLE_ITEM is not None, "No Zotero item with notes found") + def test_note_inventory_commands(self): + assert NOTE_SAMPLE_ITEM is not None + item_notes = self.run_cli(["--json", "item", "notes", str(NOTE_SAMPLE_ITEM["itemID"])]) + self.assertEqual(item_notes.returncode, 0, msg=item_notes.stderr) + item_notes_data = json.loads(item_notes.stdout) + self.assertTrue(item_notes_data) + note_key = item_notes_data[0]["key"] + + note_get = self.run_cli(["--json", "note", "get", note_key]) + self.assertEqual(note_get.returncode, 0, msg=note_get.stderr) + self.assertIn(note_key, note_get.stdout) + + def test_connector_ping(self): + result = self.run_cli(["--json", "app", "ping"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"connector_available": true', result.stdout) + + def test_collection_use_selected(self): + result = self.run_cli(["--json", "collection", "use-selected"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("libraryID", result.stdout) + + @unittest.skipUnless(SAMPLE_COLLECTION is not None, "No Zotero collection found") + def test_collection_detail_commands(self): + collection_key = SAMPLE_COLLECTION["key"] + + tree = self.run_cli(["--json", "collection", "tree"]) + self.assertEqual(tree.returncode, 0, msg=tree.stderr) + self.assertIn("children", tree.stdout) + + collection_get = self.run_cli(["--json", "collection", "get", collection_key]) + self.assertEqual(collection_get.returncode, 0, msg=collection_get.stderr) + self.assertIn(collection_key, collection_get.stdout) + + collection_items = self.run_cli(["--json", "collection", "items", collection_key]) + self.assertEqual(collection_items.returncode, 0, msg=collection_items.stderr) + + @unittest.skipUnless(SAMPLE_TAG is not None, "No Zotero tag found") + def test_tag_and_session_commands(self): + tag_items = self.run_cli(["--json", "tag", "items", SAMPLE_TAG]) + self.assertEqual(tag_items.returncode, 0, msg=tag_items.stderr) + self.assertIn("itemID", tag_items.stdout) + + if SAMPLE_COLLECTION is not None: + session_collection = self.run_cli(["--json", "session", "use-collection", SAMPLE_COLLECTION["key"]]) + self.assertEqual(session_collection.returncode, 0, msg=session_collection.stderr) + self.assertIn('"current_collection"', session_collection.stdout) + + if SAMPLE_ITEM is not None: + session_item = self.run_cli(["--json", "session", "use-item", str(SAMPLE_ITEM["itemID"])]) + self.assertEqual(session_item.returncode, 0, msg=session_item.stderr) + self.assertIn(f'"current_item": "{SAMPLE_ITEM["itemID"]}"', session_item.stdout) + + @unittest.skipUnless(SAMPLE_SEARCH is not None, "No Zotero saved search found") + def test_search_detail_commands(self): + assert SAMPLE_SEARCH is not None + search_get = self.run_cli(["--json", "search", "get", str(SAMPLE_SEARCH["savedSearchID"])]) + self.assertEqual(search_get.returncode, 0, msg=search_get.stderr) + self.assertIn(SAMPLE_SEARCH["key"], search_get.stdout) + + search_items = self.run_cli(["--json", "search", "items", str(SAMPLE_SEARCH["savedSearchID"])]) + self.assertEqual(search_items.returncode, 0, msg=search_items.stderr) + + @unittest.skipUnless(os.environ.get("CLI_ANYTHING_ZOTERO_ENABLE_WRITE_E2E") == "1", "Write E2E disabled") + def test_opt_in_write_import_commands(self): + target = os.environ.get("CLI_ANYTHING_ZOTERO_IMPORT_TARGET", "").strip() + self.assertTrue(target, "CLI_ANYTHING_ZOTERO_IMPORT_TARGET must be set when write E2E is enabled") + + with tempfile.TemporaryDirectory() as tmpdir: + ris_path = Path(tmpdir) / "import.ris" + ris_path.write_text("TY - JOUR\nTI - CLI Anything Write E2E RIS\nER - \n", encoding="utf-8") + ris_result = self.run_cli(["--json", "import", "file", str(ris_path), "--collection", target, "--tag", "cli-anything-e2e"]) + self.assertEqual(ris_result.returncode, 0, msg=ris_result.stderr) + self.assertIn('"action": "import_file"', ris_result.stdout) + + json_path = Path(tmpdir) / "import.json" + json_path.write_text( + json.dumps([{"itemType": "journalArticle", "title": "CLI Anything Write E2E JSON"}], ensure_ascii=False), + encoding="utf-8", + ) + json_result = self.run_cli(["--json", "import", "json", str(json_path), "--collection", target, "--tag", "cli-anything-e2e"]) + self.assertEqual(json_result.returncode, 0, msg=json_result.stderr) + self.assertIn('"action": "import_json"', json_result.stdout) + + @unittest.skipUnless(os.environ.get("CLI_ANYTHING_ZOTERO_ENABLE_WRITE_E2E") == "1", "Write E2E disabled") + def test_opt_in_import_json_with_inline_attachment(self): + target = os.environ.get("CLI_ANYTHING_ZOTERO_IMPORT_TARGET", "").strip() + self.assertTrue(target, "CLI_ANYTHING_ZOTERO_IMPORT_TARGET must be set when write E2E is enabled") + + with tempfile.TemporaryDirectory() as tmpdir: + title = f"CLI Anything Attachment E2E {uuid.uuid4().hex[:8]}" + pdf_path = Path(tmpdir) / "inline-e2e.pdf" + pdf_path.write_bytes(sample_pdf_bytes("live-e2e")) + json_path = Path(tmpdir) / "import-attachment.json" + json_path.write_text( + json.dumps( + [ + { + "itemType": "journalArticle", + "title": title, + "attachments": [{"path": str(pdf_path)}], + } + ], + ensure_ascii=False, + ), + encoding="utf-8", + ) + + import_result = self.run_cli( + ["--json", "import", "json", str(json_path), "--collection", target, "--tag", "cli-anything-e2e"] + ) + self.assertEqual(import_result.returncode, 0, msg=import_result.stderr) + self.assertIn('"created_count": 1', import_result.stdout) + + find_result = self.run_cli_with_retry(["--json", "item", "find", title, "--exact-title"], retries=4) + self.assertEqual(find_result.returncode, 0, msg=find_result.stderr) + imported_items = json.loads(find_result.stdout) + self.assertTrue(imported_items) + imported_item_id = str(imported_items[0]["itemID"]) + + attachments_result = self.run_cli_with_retry(["--json", "item", "attachments", imported_item_id], retries=4) + self.assertEqual(attachments_result.returncode, 0, msg=attachments_result.stderr) + attachments = json.loads(attachments_result.stdout) + self.assertTrue(attachments) + self.assertTrue(any((attachment.get("resolvedPath") or "").lower().endswith(".pdf") for attachment in attachments)) + + item_file_result = self.run_cli_with_retry(["--json", "item", "file", imported_item_id], retries=4) + self.assertEqual(item_file_result.returncode, 0, msg=item_file_result.stderr) + item_file = json.loads(item_file_result.stdout) + self.assertTrue(item_file.get("exists")) + self.assertTrue((item_file.get("resolvedPath") or "").lower().endswith(".pdf")) + + @unittest.skipUnless(os.environ.get("CLI_ANYTHING_ZOTERO_ENABLE_WRITE_E2E") == "1", "Write E2E disabled") + @unittest.skipUnless(SAMPLE_ITEM is not None, "No regular Zotero item found") + def test_opt_in_note_add_command(self): + assert SAMPLE_ITEM is not None + result = self.run_cli(["--json", "note", "add", str(SAMPLE_ITEM["itemID"]), "--text", "CLI Anything write note"]) + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn('"action": "note_add"', result.stdout) + + @unittest.skipUnless(SAMPLE_ITEM is not None, "No regular Zotero item found for export/citation tests") + def test_item_citation_bibliography_and_exports(self): + assert SAMPLE_ITEM is not None + item_ref = str(SAMPLE_ITEM["itemID"]) + citation = self.run_cli_with_retry(["--json", "item", "citation", item_ref, "--style", "apa", "--locale", "en-US"]) + self.assertEqual(citation.returncode, 0, msg=citation.stderr) + citation_data = json.loads(citation.stdout) + self.assertTrue(citation_data.get("citation")) + + bibliography = self.run_cli_with_retry(["--json", "item", "bibliography", item_ref, "--style", "apa", "--locale", "en-US"]) + self.assertEqual(bibliography.returncode, 0, msg=bibliography.stderr) + bibliography_data = json.loads(bibliography.stdout) + self.assertTrue(bibliography_data.get("bibliography")) + + ris = self.run_cli_with_retry(["--json", "item", "export", item_ref, "--format", "ris"]) + self.assertEqual(ris.returncode, 0, msg=ris.stderr) + ris_data = json.loads(ris.stdout) + self.assertIn("TY -", ris_data["content"]) + + bibtex = self.run_cli_with_retry(["--json", "item", "export", item_ref, "--format", "bibtex"]) + self.assertEqual(bibtex.returncode, 0, msg=bibtex.stderr) + bibtex_data = json.loads(bibtex.stdout) + self.assertIn("@", bibtex_data["content"]) + + csljson = self.run_cli_with_retry(["--json", "item", "export", item_ref, "--format", "csljson"]) + self.assertEqual(csljson.returncode, 0, msg=csljson.stderr) + csljson_data = json.loads(csljson.stdout) + parsed = json.loads(csljson_data["content"]) + self.assertTrue(parsed) diff --git a/zotero/agent-harness/cli_anything/zotero/utils/__init__.py b/zotero/agent-harness/cli_anything/zotero/utils/__init__.py new file mode 100644 index 000000000..befc99816 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/utils/__init__.py @@ -0,0 +1 @@ +"""Utility modules for cli-anything-zotero.""" diff --git a/zotero/agent-harness/cli_anything/zotero/utils/openai_api.py b/zotero/agent-harness/cli_anything/zotero/utils/openai_api.py new file mode 100644 index 000000000..c5b6659b7 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/utils/openai_api.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import json +import os +import urllib.error +import urllib.request +from typing import Any + + +DEFAULT_RESPONSES_API_URL = "https://api.openai.com/v1/responses" + + +def _extract_text(response_payload: dict[str, Any]) -> str: + output_text = response_payload.get("output_text") + if isinstance(output_text, str) and output_text.strip(): + return output_text.strip() + + parts: list[str] = [] + for item in response_payload.get("output", []) or []: + if not isinstance(item, dict): + continue + for content in item.get("content", []) or []: + if not isinstance(content, dict): + continue + text = content.get("text") + if isinstance(text, str) and text.strip(): + parts.append(text.strip()) + return "\n\n".join(parts).strip() + + +def create_text_response( + *, + api_key: str, + model: str, + instructions: str, + input_text: str, + timeout: int = 60, +) -> dict[str, Any]: + responses_url = os.environ.get("CLI_ANYTHING_ZOTERO_OPENAI_URL", "").strip() or DEFAULT_RESPONSES_API_URL + payload = { + "model": model, + "instructions": instructions, + "input": input_text, + } + request = urllib.request.Request( + responses_url, + data=json.dumps(payload).encode("utf-8"), + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + method="POST", + ) + try: + with urllib.request.urlopen(request, timeout=timeout) as response: + response_payload = json.loads(response.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + raise RuntimeError(f"OpenAI Responses API returned HTTP {exc.code}: {body}") from exc + except urllib.error.URLError as exc: + raise RuntimeError(f"OpenAI Responses API request failed: {exc}") from exc + + answer = _extract_text(response_payload) + if not answer: + raise RuntimeError("OpenAI Responses API returned no text output") + return { + "response_id": response_payload.get("id"), + "answer": answer, + "raw": response_payload, + } diff --git a/zotero/agent-harness/cli_anything/zotero/utils/repl_skin.py b/zotero/agent-harness/cli_anything/zotero/utils/repl_skin.py new file mode 100644 index 000000000..c7312348a --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/utils/repl_skin.py @@ -0,0 +1,521 @@ +"""cli-anything REPL Skin โ€” Unified terminal interface for all CLI harnesses. + +Copy this file into your CLI package at: + cli_anything//utils/repl_skin.py + +Usage: + from cli_anything..utils.repl_skin import ReplSkin + + skin = ReplSkin("shotcut", version="1.0.0") + skin.print_banner() # auto-detects skills/SKILL.md inside the package + prompt_text = skin.prompt(project_name="my_video.mlt", modified=True) + skin.success("Project saved") + skin.error("File not found") + skin.warning("Unsaved changes") + skin.info("Processing 24 clips...") + skin.status("Track 1", "3 clips, 00:02:30") + skin.table(headers, rows) + skin.print_goodbye() +""" + +import os +import sys + +# โ”€โ”€ ANSI color codes (no external deps for core styling) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +_RESET = "\033[0m" +_BOLD = "\033[1m" +_DIM = "\033[2m" +_ITALIC = "\033[3m" +_UNDERLINE = "\033[4m" + +# Brand colors +_CYAN = "\033[38;5;80m" # cli-anything brand cyan +_CYAN_BG = "\033[48;5;80m" +_WHITE = "\033[97m" +_GRAY = "\033[38;5;245m" +_DARK_GRAY = "\033[38;5;240m" +_LIGHT_GRAY = "\033[38;5;250m" + +# Software accent colors โ€” each software gets a unique accent +_ACCENT_COLORS = { + "gimp": "\033[38;5;214m", # warm orange + "blender": "\033[38;5;208m", # deep orange + "inkscape": "\033[38;5;39m", # bright blue + "audacity": "\033[38;5;33m", # navy blue + "libreoffice": "\033[38;5;40m", # green + "obs_studio": "\033[38;5;55m", # purple + "kdenlive": "\033[38;5;69m", # slate blue + "shotcut": "\033[38;5;35m", # teal green +} +_DEFAULT_ACCENT = "\033[38;5;75m" # default sky blue + +# Status colors +_GREEN = "\033[38;5;78m" +_YELLOW = "\033[38;5;220m" +_RED = "\033[38;5;196m" +_BLUE = "\033[38;5;75m" +_MAGENTA = "\033[38;5;176m" + +# โ”€โ”€ Brand icon โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +# The cli-anything icon: a small colored diamond/chevron mark +_ICON = f"{_CYAN}{_BOLD}โ—†{_RESET}" +_ICON_SMALL = f"{_CYAN}โ–ธ{_RESET}" + +# โ”€โ”€ Box drawing characters โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +_H_LINE = "โ”€" +_V_LINE = "โ”‚" +_TL = "โ•ญ" +_TR = "โ•ฎ" +_BL = "โ•ฐ" +_BR = "โ•ฏ" +_T_DOWN = "โ”ฌ" +_T_UP = "โ”ด" +_T_RIGHT = "โ”œ" +_T_LEFT = "โ”ค" +_CROSS = "โ”ผ" + + +def _strip_ansi(text: str) -> str: + """Remove ANSI escape codes for length calculation.""" + import re + return re.sub(r"\033\[[^m]*m", "", text) + + +def _visible_len(text: str) -> int: + """Get visible length of text (excluding ANSI codes).""" + return len(_strip_ansi(text)) + + +class ReplSkin: + """Unified REPL skin for cli-anything CLIs. + + Provides consistent branding, prompts, and message formatting + across all CLI harnesses built with the cli-anything methodology. + """ + + def __init__(self, software: str, version: str = "1.0.0", + history_file: str | None = None, skill_path: str | None = None): + """Initialize the REPL skin. + + Args: + software: Software name (e.g., "gimp", "shotcut", "blender"). + version: CLI version string. + history_file: Path for persistent command history. + Defaults to ~/.cli-anything-/history + skill_path: Path to the SKILL.md file for agent discovery. + Auto-detected from the package's skills/ directory if not provided. + Displayed in banner for AI agents to know where to read skill info. + """ + self.software = software.lower().replace("-", "_") + self.display_name = software.replace("_", " ").title() + self.version = version + + # Auto-detect skill path from package layout: + # cli_anything//utils/repl_skin.py (this file) + # cli_anything//skills/SKILL.md (target) + if skill_path is None: + from pathlib import Path + _auto = Path(__file__).resolve().parent.parent / "skills" / "SKILL.md" + if _auto.is_file(): + skill_path = str(_auto) + self.skill_path = skill_path + self.accent = _ACCENT_COLORS.get(self.software, _DEFAULT_ACCENT) + + # History file + if history_file is None: + from pathlib import Path + hist_dir = Path.home() / f".cli-anything-{self.software}" + hist_dir.mkdir(parents=True, exist_ok=True) + self.history_file = str(hist_dir / "history") + else: + self.history_file = history_file + + # Detect terminal capabilities + self._color = self._detect_color_support() + + def _detect_color_support(self) -> bool: + """Check if terminal supports color.""" + if os.environ.get("NO_COLOR"): + return False + if os.environ.get("CLI_ANYTHING_NO_COLOR"): + return False + if not hasattr(sys.stdout, "isatty"): + return False + return sys.stdout.isatty() + + def _c(self, code: str, text: str) -> str: + """Apply color code if colors are supported.""" + if not self._color: + return text + return f"{code}{text}{_RESET}" + + # โ”€โ”€ Banner โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def print_banner(self): + """Print the startup banner with branding.""" + inner = 54 + + def _box_line(content: str) -> str: + """Wrap content in box drawing, padding to inner width.""" + pad = inner - _visible_len(content) + vl = self._c(_DARK_GRAY, _V_LINE) + return f"{vl}{content}{' ' * max(0, pad)}{vl}" + + top = self._c(_DARK_GRAY, f"{_TL}{_H_LINE * inner}{_TR}") + bot = self._c(_DARK_GRAY, f"{_BL}{_H_LINE * inner}{_BR}") + + # Title: โ—† cli-anything ยท Shotcut + icon = self._c(_CYAN + _BOLD, "โ—†") + brand = self._c(_CYAN + _BOLD, "cli-anything") + dot = self._c(_DARK_GRAY, "ยท") + name = self._c(self.accent + _BOLD, self.display_name) + title = f" {icon} {brand} {dot} {name}" + + ver = f" {self._c(_DARK_GRAY, f' v{self.version}')}" + tip = f" {self._c(_DARK_GRAY, ' Type help for commands, quit to exit')}" + empty = "" + + # Skill path for agent discovery + skill_line = None + if self.skill_path: + skill_icon = self._c(_MAGENTA, "โ—‡") + skill_label = self._c(_DARK_GRAY, " Skill:") + skill_path_display = self._c(_LIGHT_GRAY, self.skill_path) + skill_line = f" {skill_icon} {skill_label} {skill_path_display}" + + print(top) + print(_box_line(title)) + print(_box_line(ver)) + if skill_line: + print(_box_line(skill_line)) + print(_box_line(empty)) + print(_box_line(tip)) + print(bot) + print() + + # โ”€โ”€ Prompt โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def prompt(self, project_name: str = "", modified: bool = False, + context: str = "") -> str: + """Build a styled prompt string for prompt_toolkit or input(). + + Args: + project_name: Current project name (empty if none open). + modified: Whether the project has unsaved changes. + context: Optional extra context to show in prompt. + + Returns: + Formatted prompt string. + """ + parts = [] + + # Icon + if self._color: + parts.append(f"{_CYAN}โ—†{_RESET} ") + else: + parts.append("> ") + + # Software name + parts.append(self._c(self.accent + _BOLD, self.software)) + + # Project context + if project_name or context: + ctx = context or project_name + mod = "*" if modified else "" + parts.append(f" {self._c(_DARK_GRAY, '[')}") + parts.append(self._c(_LIGHT_GRAY, f"{ctx}{mod}")) + parts.append(self._c(_DARK_GRAY, ']')) + + parts.append(self._c(_GRAY, " โฏ ")) + + return "".join(parts) + + def prompt_tokens(self, project_name: str = "", modified: bool = False, + context: str = ""): + """Build prompt_toolkit formatted text tokens for the prompt. + + Use with prompt_toolkit's FormattedText for proper ANSI handling. + + Returns: + list of (style, text) tuples for prompt_toolkit. + """ + accent_hex = _ANSI_256_TO_HEX.get(self.accent, "#5fafff") + tokens = [] + + tokens.append(("class:icon", "โ—† ")) + tokens.append(("class:software", self.software)) + + if project_name or context: + ctx = context or project_name + mod = "*" if modified else "" + tokens.append(("class:bracket", " [")) + tokens.append(("class:context", f"{ctx}{mod}")) + tokens.append(("class:bracket", "]")) + + tokens.append(("class:arrow", " โฏ ")) + + return tokens + + def get_prompt_style(self): + """Get a prompt_toolkit Style object matching the skin. + + Returns: + prompt_toolkit.styles.Style + """ + try: + from prompt_toolkit.styles import Style + except ImportError: + return None + + accent_hex = _ANSI_256_TO_HEX.get(self.accent, "#5fafff") + + return Style.from_dict({ + "icon": "#5fdfdf bold", # cyan brand color + "software": f"{accent_hex} bold", + "bracket": "#585858", + "context": "#bcbcbc", + "arrow": "#808080", + # Completion menu + "completion-menu.completion": "bg:#303030 #bcbcbc", + "completion-menu.completion.current": f"bg:{accent_hex} #000000", + "completion-menu.meta.completion": "bg:#303030 #808080", + "completion-menu.meta.completion.current": f"bg:{accent_hex} #000000", + # Auto-suggest + "auto-suggest": "#585858", + # Bottom toolbar + "bottom-toolbar": "bg:#1c1c1c #808080", + "bottom-toolbar.text": "#808080", + }) + + # โ”€โ”€ Messages โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def success(self, message: str): + """Print a success message with green checkmark.""" + icon = self._c(_GREEN + _BOLD, "โœ“") + print(f" {icon} {self._c(_GREEN, message)}") + + def error(self, message: str): + """Print an error message with red cross.""" + icon = self._c(_RED + _BOLD, "โœ—") + print(f" {icon} {self._c(_RED, message)}", file=sys.stderr) + + def warning(self, message: str): + """Print a warning message with yellow triangle.""" + icon = self._c(_YELLOW + _BOLD, "โš ") + print(f" {icon} {self._c(_YELLOW, message)}") + + def info(self, message: str): + """Print an info message with blue dot.""" + icon = self._c(_BLUE, "โ—") + print(f" {icon} {self._c(_LIGHT_GRAY, message)}") + + def hint(self, message: str): + """Print a subtle hint message.""" + print(f" {self._c(_DARK_GRAY, message)}") + + def section(self, title: str): + """Print a section header.""" + print() + print(f" {self._c(self.accent + _BOLD, title)}") + print(f" {self._c(_DARK_GRAY, _H_LINE * len(title))}") + + # โ”€โ”€ Status display โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def status(self, label: str, value: str): + """Print a key-value status line.""" + lbl = self._c(_GRAY, f" {label}:") + val = self._c(_WHITE, f" {value}") + print(f"{lbl}{val}") + + def status_block(self, items: dict[str, str], title: str = ""): + """Print a block of status key-value pairs. + + Args: + items: Dict of label -> value pairs. + title: Optional title for the block. + """ + if title: + self.section(title) + + max_key = max(len(k) for k in items) if items else 0 + for label, value in items.items(): + lbl = self._c(_GRAY, f" {label:<{max_key}}") + val = self._c(_WHITE, f" {value}") + print(f"{lbl}{val}") + + def progress(self, current: int, total: int, label: str = ""): + """Print a simple progress indicator. + + Args: + current: Current step number. + total: Total number of steps. + label: Optional label for the progress. + """ + pct = int(current / total * 100) if total > 0 else 0 + bar_width = 20 + filled = int(bar_width * current / total) if total > 0 else 0 + bar = "โ–ˆ" * filled + "โ–‘" * (bar_width - filled) + text = f" {self._c(_CYAN, bar)} {self._c(_GRAY, f'{pct:3d}%')}" + if label: + text += f" {self._c(_LIGHT_GRAY, label)}" + print(text) + + # โ”€โ”€ Table display โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def table(self, headers: list[str], rows: list[list[str]], + max_col_width: int = 40): + """Print a formatted table with box-drawing characters. + + Args: + headers: Column header strings. + rows: List of rows, each a list of cell strings. + max_col_width: Maximum column width before truncation. + """ + if not headers: + return + + # Calculate column widths + col_widths = [min(len(h), max_col_width) for h in headers] + for row in rows: + for i, cell in enumerate(row): + if i < len(col_widths): + col_widths[i] = min( + max(col_widths[i], len(str(cell))), max_col_width + ) + + def pad(text: str, width: int) -> str: + t = str(text)[:width] + return t + " " * (width - len(t)) + + # Header + header_cells = [ + self._c(_CYAN + _BOLD, pad(h, col_widths[i])) + for i, h in enumerate(headers) + ] + sep = self._c(_DARK_GRAY, f" {_V_LINE} ") + header_line = f" {sep.join(header_cells)}" + print(header_line) + + # Separator + sep_parts = [self._c(_DARK_GRAY, _H_LINE * w) for w in col_widths] + sep_line = self._c(_DARK_GRAY, f" {'โ”€โ”€โ”€'.join([_H_LINE * w for w in col_widths])}") + print(sep_line) + + # Rows + for row in rows: + cells = [] + for i, cell in enumerate(row): + if i < len(col_widths): + cells.append(self._c(_LIGHT_GRAY, pad(str(cell), col_widths[i]))) + row_sep = self._c(_DARK_GRAY, f" {_V_LINE} ") + print(f" {row_sep.join(cells)}") + + # โ”€โ”€ Help display โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def help(self, commands: dict[str, str]): + """Print a formatted help listing. + + Args: + commands: Dict of command -> description pairs. + """ + self.section("Commands") + max_cmd = max(len(c) for c in commands) if commands else 0 + for cmd, desc in commands.items(): + cmd_styled = self._c(self.accent, f" {cmd:<{max_cmd}}") + desc_styled = self._c(_GRAY, f" {desc}") + print(f"{cmd_styled}{desc_styled}") + print() + + # โ”€โ”€ Goodbye โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def print_goodbye(self): + """Print a styled goodbye message.""" + print(f"\n {_ICON_SMALL} {self._c(_GRAY, 'Goodbye!')}\n") + + # โ”€โ”€ Prompt toolkit session factory โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def create_prompt_session(self): + """Create a prompt_toolkit PromptSession with skin styling. + + Returns: + A configured PromptSession, or None if prompt_toolkit unavailable. + """ + try: + from prompt_toolkit import PromptSession + from prompt_toolkit.history import FileHistory + from prompt_toolkit.auto_suggest import AutoSuggestFromHistory + from prompt_toolkit.formatted_text import FormattedText + + style = self.get_prompt_style() + + session = PromptSession( + history=FileHistory(self.history_file), + auto_suggest=AutoSuggestFromHistory(), + style=style, + enable_history_search=True, + ) + return session + except ImportError: + return None + + def get_input(self, pt_session, project_name: str = "", + modified: bool = False, context: str = "") -> str: + """Get input from user using prompt_toolkit or fallback. + + Args: + pt_session: A prompt_toolkit PromptSession (or None). + project_name: Current project name. + modified: Whether project has unsaved changes. + context: Optional context string. + + Returns: + User input string (stripped). + """ + if pt_session is not None: + from prompt_toolkit.formatted_text import FormattedText + tokens = self.prompt_tokens(project_name, modified, context) + return pt_session.prompt(FormattedText(tokens)).strip() + else: + raw_prompt = self.prompt(project_name, modified, context) + return input(raw_prompt).strip() + + # โ”€โ”€ Toolbar builder โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def bottom_toolbar(self, items: dict[str, str]): + """Create a bottom toolbar callback for prompt_toolkit. + + Args: + items: Dict of label -> value pairs to show in toolbar. + + Returns: + A callable that returns FormattedText for the toolbar. + """ + def toolbar(): + from prompt_toolkit.formatted_text import FormattedText + parts = [] + for i, (k, v) in enumerate(items.items()): + if i > 0: + parts.append(("class:bottom-toolbar.text", " โ”‚ ")) + parts.append(("class:bottom-toolbar.text", f" {k}: ")) + parts.append(("class:bottom-toolbar", v)) + return FormattedText(parts) + return toolbar + + +# โ”€โ”€ ANSI 256-color to hex mapping (for prompt_toolkit styles) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +_ANSI_256_TO_HEX = { + "\033[38;5;33m": "#0087ff", # audacity navy blue + "\033[38;5;35m": "#00af5f", # shotcut teal + "\033[38;5;39m": "#00afff", # inkscape bright blue + "\033[38;5;40m": "#00d700", # libreoffice green + "\033[38;5;55m": "#5f00af", # obs purple + "\033[38;5;69m": "#5f87ff", # kdenlive slate blue + "\033[38;5;75m": "#5fafff", # default sky blue + "\033[38;5;80m": "#5fd7d7", # brand cyan + "\033[38;5;208m": "#ff8700", # blender deep orange + "\033[38;5;214m": "#ffaf00", # gimp warm orange +} diff --git a/zotero/agent-harness/cli_anything/zotero/utils/zotero_http.py b/zotero/agent-harness/cli_anything/zotero/utils/zotero_http.py new file mode 100644 index 000000000..6db01347e --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/utils/zotero_http.py @@ -0,0 +1,230 @@ +from __future__ import annotations + +import json +import time +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass +from typing import Any, Optional + + +LOCAL_API_VERSION = "3" + + +@dataclass +class HttpResponse: + status: int + headers: dict[str, str] + body: str + + def json(self) -> Any: + return json.loads(self.body) + + +def _build_url(port: int, path: str, params: Optional[dict[str, Any]] = None) -> str: + if not path.startswith("/"): + path = "/" + path + url = f"http://127.0.0.1:{port}{path}" + if params: + pairs: list[tuple[str, str]] = [] + for key, value in params.items(): + if value is None: + continue + if isinstance(value, (list, tuple)): + for entry in value: + pairs.append((key, str(entry))) + else: + pairs.append((key, str(value))) + if pairs: + url += "?" + urllib.parse.urlencode(pairs, doseq=True) + return url + + +def request( + port: int, + path: str, + *, + method: str = "GET", + params: Optional[dict[str, Any]] = None, + payload: Optional[dict[str, Any]] = None, + data: bytes | str | None = None, + headers: Optional[dict[str, str]] = None, + timeout: int = 5, +) -> HttpResponse: + request_headers = {"Accept": "*/*"} + if headers: + request_headers.update(headers) + if payload is not None and data is not None: + raise ValueError("payload and data are mutually exclusive") + body_data: bytes | None = None + if payload is not None: + request_headers.setdefault("Content-Type", "application/json") + body_data = json.dumps(payload).encode("utf-8") + elif data is not None: + body_data = data.encode("utf-8") if isinstance(data, str) else data + req = urllib.request.Request( + _build_url(port, path, params=params), + data=body_data, + headers=request_headers, + method=method, + ) + try: + with urllib.request.urlopen(req, timeout=timeout) as response: + body = response.read().decode("utf-8", errors="replace") + return HttpResponse(response.getcode(), {k: v for k, v in response.headers.items()}, body) + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + return HttpResponse(exc.code, {k: v for k, v in exc.headers.items()}, body) + except urllib.error.URLError as exc: + raise RuntimeError(f"HTTP request failed for {path}: {exc}") from exc + + +def connector_ping(port: int, timeout: int = 3) -> HttpResponse: + return request(port, "/connector/ping", timeout=timeout) + + +def connector_is_available(port: int, timeout: int = 3) -> tuple[bool, str]: + try: + response = connector_ping(port, timeout=timeout) + except RuntimeError as exc: + return False, str(exc) + if response.status == 200: + return True, "connector available" + return False, f"connector returned HTTP {response.status}" + + +def get_selected_collection(port: int, timeout: int = 5) -> dict[str, Any]: + response = request(port, "/connector/getSelectedCollection", method="POST", payload={}, timeout=timeout) + if response.status != 200: + raise RuntimeError(f"connector/getSelectedCollection returned HTTP {response.status}: {response.body}") + return response.json() + + +def connector_import_text(port: int, content: str, *, session_id: str | None = None, timeout: int = 20) -> list[dict[str, Any]]: + params = {"session": session_id} if session_id else None + response = request(port, "/connector/import", method="POST", params=params, data=content, timeout=timeout) + if response.status != 201: + raise RuntimeError(f"connector/import returned HTTP {response.status}: {response.body}") + parsed = response.json() + return parsed if isinstance(parsed, list) else [parsed] + + +def connector_save_items(port: int, items: list[dict[str, Any]], *, session_id: str, timeout: int = 20) -> None: + response = request( + port, + "/connector/saveItems", + method="POST", + payload={"sessionID": session_id, "items": items}, + timeout=timeout, + ) + if response.status != 201: + raise RuntimeError(f"connector/saveItems returned HTTP {response.status}: {response.body}") + + +def connector_save_attachment( + port: int, + *, + session_id: str, + parent_item_id: str | int, + title: str, + url: str, + content: bytes, + timeout: int = 60, +) -> dict[str, Any]: + response = request( + port, + "/connector/saveAttachment", + method="POST", + data=content, + headers={ + "Content-Type": "application/pdf", + "X-Metadata": json.dumps( + { + "sessionID": session_id, + "parentItemID": str(parent_item_id), + "title": title, + "url": url, + } + ), + }, + timeout=timeout, + ) + if response.status not in (200, 201): + raise RuntimeError(f"connector/saveAttachment returned HTTP {response.status}: {response.body}") + return response.json() if response.body else {} + + +def connector_update_session( + port: int, + *, + session_id: str, + target: str, + tags: list[str] | tuple[str, ...] | None = None, + timeout: int = 15, +) -> dict[str, Any]: + response = request( + port, + "/connector/updateSession", + method="POST", + payload={ + "sessionID": session_id, + "target": target, + "tags": ", ".join(tag for tag in (tags or []) if str(tag).strip()), + }, + timeout=timeout, + ) + if response.status != 200: + raise RuntimeError(f"connector/updateSession returned HTTP {response.status}: {response.body}") + return response.json() if response.body else {} + + +def local_api_root(port: int, timeout: int = 3) -> HttpResponse: + return request(port, "/api/", headers={"Zotero-API-Version": LOCAL_API_VERSION}, timeout=timeout) + + +def local_api_is_available(port: int, timeout: int = 3) -> tuple[bool, str]: + try: + response = local_api_root(port, timeout=timeout) + except RuntimeError as exc: + return False, str(exc) + if response.status == 200: + return True, "local API available" + if response.status == 403: + return False, "local API disabled" + return False, f"local API returned HTTP {response.status}" + + +def wait_for_endpoint( + port: int, + path: str, + *, + timeout: int = 30, + poll_interval: float = 0.5, + headers: Optional[dict[str, str]] = None, + ready_statuses: tuple[int, ...] = (200,), +) -> bool: + deadline = time.time() + timeout + while time.time() < deadline: + try: + response = request(port, path, headers=headers, timeout=3) + if response.status in ready_statuses: + return True + except RuntimeError: + pass + time.sleep(poll_interval) + return False + + +def local_api_get_json(port: int, path: str, params: Optional[dict[str, Any]] = None, timeout: int = 10) -> Any: + response = request(port, path, params=params, headers={"Zotero-API-Version": LOCAL_API_VERSION, "Accept": "application/json"}, timeout=timeout) + if response.status != 200: + raise RuntimeError(f"Local API returned HTTP {response.status} for {path}: {response.body}") + return response.json() + + +def local_api_get_text(port: int, path: str, params: Optional[dict[str, Any]] = None, timeout: int = 15) -> str: + response = request(port, path, params=params, headers={"Zotero-API-Version": LOCAL_API_VERSION}, timeout=timeout) + if response.status != 200: + raise RuntimeError(f"Local API returned HTTP {response.status} for {path}: {response.body}") + return response.body diff --git a/zotero/agent-harness/cli_anything/zotero/utils/zotero_paths.py b/zotero/agent-harness/cli_anything/zotero/utils/zotero_paths.py new file mode 100644 index 000000000..7b12152ac --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/utils/zotero_paths.py @@ -0,0 +1,298 @@ +from __future__ import annotations + +import configparser +import os +import re +import shutil +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Mapping, Optional + + +DATA_DIR_PREF = "extensions.zotero.dataDir" +USE_DATA_DIR_PREF = "extensions.zotero.useDataDir" +LOCAL_API_PREF = "extensions.zotero.httpServer.localAPI.enabled" +HTTP_PORT_PREF = "extensions.zotero.httpServer.port" + + +@dataclass +class ZoteroEnvironment: + executable: Optional[Path] + executable_exists: bool + install_dir: Optional[Path] + version: str + profile_root: Path + profile_dir: Optional[Path] + data_dir: Path + data_dir_exists: bool + sqlite_path: Path + sqlite_exists: bool + styles_dir: Path + styles_exists: bool + storage_dir: Path + storage_exists: bool + translators_dir: Path + translators_exists: bool + port: int + local_api_enabled_configured: bool + + def to_dict(self) -> dict: + data = asdict(self) + for key, value in data.items(): + if isinstance(value, Path): + data[key] = str(value) + return data + + +def candidate_profile_roots(env: Mapping[str, str] | None = None, home: Path | None = None) -> list[Path]: + env = env or os.environ + home = home or Path.home() + candidates: list[Path] = [] + + def add(path: Path | str | None) -> None: + if not path: + return + candidate = Path(path).expanduser() + if candidate not in candidates: + candidates.append(candidate) + + appdata = env.get("APPDATA") + if appdata: + add(Path(appdata) / "Zotero" / "Zotero") + add(home / "AppData" / "Roaming" / "Zotero" / "Zotero") + add(home / "Library" / "Application Support" / "Zotero") + add(home / ".zotero" / "zotero") + return candidates + + +def find_profile_root(explicit_profile_dir: str | None = None, env: Mapping[str, str] | None = None) -> Path: + env = env or os.environ + if explicit_profile_dir: + explicit = Path(explicit_profile_dir).expanduser() + if explicit.name == "profiles.ini": + return explicit.parent + if (explicit / "profiles.ini").exists(): + return explicit + if (explicit.parent / "profiles.ini").exists(): + return explicit.parent + return explicit + + env_profile = env.get("ZOTERO_PROFILE_DIR", "").strip() + if env_profile: + return find_profile_root(env_profile, env=env) + + for candidate in candidate_profile_roots(env=env): + if (candidate / "profiles.ini").exists(): + return candidate + return candidate_profile_roots(env=env)[0] + + +def read_profiles_ini(profile_root: Path) -> configparser.ConfigParser: + config = configparser.ConfigParser() + path = profile_root / "profiles.ini" + if path.exists(): + config.read(path, encoding="utf-8") + return config + + +def find_active_profile(profile_root: Path) -> Optional[Path]: + config = read_profiles_ini(profile_root) + ordered_sections = [section for section in config.sections() if section.lower().startswith("profile")] + for section in ordered_sections: + if config.get(section, "Default", fallback="0").strip() != "1": + continue + return _profile_path_from_section(profile_root, config, section) + for section in ordered_sections: + candidate = _profile_path_from_section(profile_root, config, section) + if candidate is not None: + return candidate + return None + + +def _profile_path_from_section(profile_root: Path, config: configparser.ConfigParser, section: str) -> Optional[Path]: + path_value = config.get(section, "Path", fallback="").strip() + if not path_value: + return None + is_relative = config.get(section, "IsRelative", fallback="1").strip() == "1" + return (profile_root / path_value).resolve() if is_relative else Path(path_value).expanduser() + + +def _read_pref_file(path: Path) -> str: + if not path.exists(): + return "" + for encoding in ("utf-8", "utf-8-sig", "latin-1"): + try: + return path.read_text(encoding=encoding) + except UnicodeDecodeError: + continue + return path.read_text(errors="replace") + + +def _decode_pref_string(raw: str) -> str: + return raw.replace("\\\\", "\\").replace('\\"', '"') + + +def read_pref(profile_dir: Path | None, pref_name: str) -> Optional[str]: + if profile_dir is None: + return None + pattern = re.compile(rf'user_pref\("{re.escape(pref_name)}",\s*(.+?)\);') + for filename in ("user.js", "prefs.js"): + text = _read_pref_file(profile_dir / filename) + for line in text.splitlines(): + match = pattern.search(line) + if not match: + continue + raw = match.group(1).strip() + if raw in {"true", "false"}: + return raw + if raw.startswith('"') and raw.endswith('"'): + return _decode_pref_string(raw[1:-1]) + return raw + return None + + +def find_data_dir(profile_dir: Path | None, explicit_data_dir: str | None = None, env: Mapping[str, str] | None = None) -> Path: + env = env or os.environ + if explicit_data_dir: + return Path(explicit_data_dir).expanduser() + + env_data_dir = env.get("ZOTERO_DATA_DIR", "").strip() + if env_data_dir: + return Path(env_data_dir).expanduser() + + if profile_dir is not None: + use_data_dir = read_pref(profile_dir, USE_DATA_DIR_PREF) + pref_data_dir = read_pref(profile_dir, DATA_DIR_PREF) + if use_data_dir == "true" and pref_data_dir: + candidate = Path(pref_data_dir).expanduser() + if candidate.exists(): + return candidate + + return Path.home() / "Zotero" + + +def find_executable(explicit_executable: str | None = None, env: Mapping[str, str] | None = None) -> Optional[Path]: + env = env or os.environ + if explicit_executable: + return Path(explicit_executable).expanduser() + + env_executable = env.get("ZOTERO_EXECUTABLE", "").strip() + if env_executable: + return Path(env_executable).expanduser() + + for name in ("zotero", "zotero.exe"): + path = shutil.which(name) + if path: + return Path(path) + + candidates = [ + Path(r"C:\Program Files\Zotero\zotero.exe"), + Path(r"C:\Program Files (x86)\Zotero\zotero.exe"), + Path("/Applications/Zotero.app/Contents/MacOS/zotero"), + Path("/usr/lib/zotero/zotero"), + Path("/usr/local/bin/zotero"), + ] + for candidate in candidates: + if candidate.exists(): + return candidate + return candidates[0] + + +def find_install_dir(executable: Optional[Path]) -> Optional[Path]: + if executable is None: + return None + return executable.parent + + +def get_version(install_dir: Optional[Path]) -> str: + if install_dir is None: + return "unknown" + candidates = [install_dir / "app" / "application.ini", install_dir / "application.ini"] + for candidate in candidates: + if not candidate.exists(): + continue + text = _read_pref_file(candidate) + match = re.search(r"^Version=(.+)$", text, re.MULTILINE) + if match: + return match.group(1).strip() + return "unknown" + + +def get_http_port(profile_dir: Path | None, env: Mapping[str, str] | None = None) -> int: + env = env or os.environ + env_port = env.get("ZOTERO_HTTP_PORT", "").strip() + if env_port: + try: + return int(env_port) + except ValueError: + pass + pref_port = read_pref(profile_dir, HTTP_PORT_PREF) + if pref_port: + try: + return int(pref_port) + except ValueError: + pass + return 23119 + + +def is_local_api_enabled(profile_dir: Path | None) -> bool: + return read_pref(profile_dir, LOCAL_API_PREF) == "true" + + +def build_environment( + explicit_data_dir: str | None = None, + explicit_profile_dir: str | None = None, + explicit_executable: str | None = None, + env: Mapping[str, str] | None = None, +) -> ZoteroEnvironment: + env = env or os.environ + profile_root = find_profile_root(explicit_profile_dir=explicit_profile_dir, env=env) + env_profile_dir = env.get("ZOTERO_PROFILE_DIR", "").strip() + explicit_or_env_profile = explicit_profile_dir or env_profile_dir or None + profile_dir = ( + Path(explicit_or_env_profile).expanduser() + if explicit_or_env_profile and (Path(explicit_or_env_profile) / "prefs.js").exists() + else find_active_profile(profile_root) + ) + executable = find_executable(explicit_executable=explicit_executable, env=env) + install_dir = find_install_dir(executable) + data_dir = find_data_dir(profile_dir, explicit_data_dir=explicit_data_dir, env=env) + sqlite_path = data_dir / "zotero.sqlite" + styles_dir = data_dir / "styles" + storage_dir = data_dir / "storage" + translators_dir = data_dir / "translators" + return ZoteroEnvironment( + executable=executable, + executable_exists=bool(executable and executable.exists()), + install_dir=install_dir, + version=get_version(install_dir), + profile_root=profile_root, + profile_dir=profile_dir, + data_dir=data_dir, + data_dir_exists=data_dir.exists(), + sqlite_path=sqlite_path, + sqlite_exists=sqlite_path.exists(), + styles_dir=styles_dir, + styles_exists=styles_dir.exists(), + storage_dir=storage_dir, + storage_exists=storage_dir.exists(), + translators_dir=translators_dir, + translators_exists=translators_dir.exists(), + port=get_http_port(profile_dir, env=env), + local_api_enabled_configured=is_local_api_enabled(profile_dir), + ) + + +def ensure_local_api_enabled(profile_dir: Path | None) -> Optional[Path]: + if profile_dir is None: + return None + user_js = profile_dir / "user.js" + existing = _read_pref_file(user_js) + line = 'user_pref("extensions.zotero.httpServer.localAPI.enabled", true);' + if line not in existing: + content = existing.rstrip() + if content: + content += "\n" + content += line + "\n" + user_js.write_text(content, encoding="utf-8") + return user_js diff --git a/zotero/agent-harness/cli_anything/zotero/utils/zotero_sqlite.py b/zotero/agent-harness/cli_anything/zotero/utils/zotero_sqlite.py new file mode 100644 index 000000000..f19e8281c --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/utils/zotero_sqlite.py @@ -0,0 +1,743 @@ +from __future__ import annotations + +import html +import os +import random +import re +import shutil +import sqlite3 +from contextlib import closing +from datetime import datetime, timezone +from pathlib import Path, PureWindowsPath +from typing import Any, Optional +from urllib.parse import unquote, urlparse + + +KEY_ALPHABET = "23456789ABCDEFGHIJKLMNPQRSTUVWXYZ" +NOTE_PREVIEW_LENGTH = 160 +_TAG_RE = re.compile(r"<[^>]+>") + + +class AmbiguousReferenceError(RuntimeError): + """Raised when a bare Zotero key matches records in multiple libraries.""" + + +def connect_readonly(sqlite_path: Path | str) -> sqlite3.Connection: + path = Path(sqlite_path).resolve() + if not path.exists(): + raise FileNotFoundError(f"Zotero database not found: {path}") + uri = f"file:{path.as_posix()}?mode=ro&immutable=1" + connection = sqlite3.connect(uri, uri=True, timeout=1.0) + connection.row_factory = sqlite3.Row + return connection + + +def connect_writable(sqlite_path: Path | str) -> sqlite3.Connection: + path = Path(sqlite_path).resolve() + if not path.exists(): + raise FileNotFoundError(f"Zotero database not found: {path}") + connection = sqlite3.connect(path, timeout=30.0) + connection.row_factory = sqlite3.Row + return connection + + +def _as_dicts(rows: list[sqlite3.Row]) -> list[dict[str, Any]]: + return [dict(row) for row in rows] + + +def _is_numeric_ref(value: Any) -> bool: + try: + int(str(value)) + return True + except (TypeError, ValueError): + return False + + +def normalize_library_ref(library_ref: str | int) -> int: + text = str(library_ref).strip() + if not text: + raise RuntimeError("Library reference must not be empty") + upper = text.upper() + if upper.startswith("L") and upper[1:].isdigit(): + return int(upper[1:]) + if text.isdigit(): + return int(text) + raise RuntimeError(f"Unsupported library reference: {library_ref}") + + +def _timestamp_text() -> str: + return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + + +def generate_object_key(length: int = 8) -> str: + chooser = random.SystemRandom() + return "".join(chooser.choice(KEY_ALPHABET) for _ in range(length)) + + +def backup_database(sqlite_path: Path | str) -> Path: + source = Path(sqlite_path).resolve() + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S") + backup = source.with_name(f"{source.stem}.backup-{timestamp}{source.suffix}") + shutil.copy2(source, backup) + return backup + + +def note_html_to_text(note_html: str | None) -> str: + if not note_html: + return "" + text = re.sub(r"(?i)", "\n", note_html) + text = re.sub(r"(?i)", "\n\n", text) + text = re.sub(r"(?i)", "\n", text) + text = _TAG_RE.sub("", text) + text = html.unescape(text) + text = text.replace("\r\n", "\n").replace("\r", "\n") + text = re.sub(r"\n{3,}", "\n\n", text) + return text.strip() + + +def note_preview(note_html: str | None, limit: int = NOTE_PREVIEW_LENGTH) -> str: + text = note_html_to_text(note_html) + if len(text) <= limit: + return text + return text[: max(0, limit - 1)].rstrip() + "โ€ฆ" + + +def fetch_libraries(sqlite_path: Path | str) -> list[dict[str, Any]]: + with closing(connect_readonly(sqlite_path)) as conn: + rows = conn.execute( + """ + SELECT libraryID, type, editable, filesEditable, version, storageVersion, lastSync, archived + FROM libraries + ORDER BY libraryID + """ + ).fetchall() + return _as_dicts(rows) + + +def resolve_library(sqlite_path: Path | str, ref: str | int) -> Optional[dict[str, Any]]: + library_id = normalize_library_ref(ref) + with closing(connect_readonly(sqlite_path)) as conn: + row = conn.execute( + """ + SELECT libraryID, type, editable, filesEditable, version, storageVersion, lastSync, archived + FROM libraries + WHERE libraryID = ? + """, + (library_id,), + ).fetchone() + return dict(row) if row else None + + +def default_library_id(sqlite_path: Path | str) -> Optional[int]: + libraries = fetch_libraries(sqlite_path) + if not libraries: + return None + for library in libraries: + if library["type"] == "user": + return int(library["libraryID"]) + return int(libraries[0]["libraryID"]) + + +def fetch_collections(sqlite_path: Path | str, library_id: int | None = None) -> list[dict[str, Any]]: + with closing(connect_readonly(sqlite_path)) as conn: + rows = conn.execute( + """ + SELECT + c.collectionID, + c.key, + c.collectionName, + c.parentCollectionID, + c.libraryID, + c.version, + COUNT(ci.itemID) AS itemCount + FROM collections c + LEFT JOIN collectionItems ci ON ci.collectionID = c.collectionID + WHERE (? IS NULL OR c.libraryID = ?) + GROUP BY c.collectionID, c.key, c.collectionName, c.parentCollectionID, c.libraryID, c.version + ORDER BY c.collectionName COLLATE NOCASE + """, + (library_id, library_id), + ).fetchall() + return _as_dicts(rows) + + +def find_collections(sqlite_path: Path | str, query: str, *, library_id: int | None = None, limit: int = 20) -> list[dict[str, Any]]: + query = query.strip() + if not query: + return [] + needle = query.lower() + like_query = f"%{needle}%" + prefix_query = f"{needle}%" + with closing(connect_readonly(sqlite_path)) as conn: + rows = conn.execute( + """ + SELECT + c.collectionID, + c.key, + c.collectionName, + c.parentCollectionID, + c.libraryID, + c.version, + COUNT(ci.itemID) AS itemCount + FROM collections c + LEFT JOIN collectionItems ci ON ci.collectionID = c.collectionID + WHERE (? IS NULL OR c.libraryID = ?) AND LOWER(c.collectionName) LIKE ? + GROUP BY c.collectionID, c.key, c.collectionName, c.parentCollectionID, c.libraryID, c.version + ORDER BY + CASE + WHEN LOWER(c.collectionName) = ? THEN 0 + WHEN LOWER(c.collectionName) LIKE ? THEN 1 + ELSE 2 + END, + INSTR(LOWER(c.collectionName), ?), + c.collectionName COLLATE NOCASE, + c.collectionID + LIMIT ? + """, + (library_id, library_id, like_query, needle, prefix_query, needle, int(limit)), + ).fetchall() + return _as_dicts(rows) + + +def build_collection_tree(collections: list[dict[str, Any]]) -> list[dict[str, Any]]: + by_id: dict[int, dict[str, Any]] = {} + roots: list[dict[str, Any]] = [] + for collection in collections: + node = {**collection, "children": []} + by_id[int(collection["collectionID"])] = node + for collection in collections: + node = by_id[int(collection["collectionID"])] + parent_id = collection["parentCollectionID"] + if parent_id is None: + roots.append(node) + continue + parent = by_id.get(int(parent_id)) + if parent is None: + roots.append(node) + else: + parent["children"].append(node) + return roots + + +def _ambiguous_reference(ref: str | int, kind: str, rows: list[sqlite3.Row]) -> None: + libraries = sorted({int(row["libraryID"]) for row in rows if "libraryID" in row.keys()}) + library_text = ", ".join(f"L{library_id}" for library_id in libraries) or "multiple libraries" + raise AmbiguousReferenceError( + f"Ambiguous {kind} reference: {ref}. Matches found in {library_text}. " + "Set the library with `session use-library ` and retry." + ) + + +def resolve_collection(sqlite_path: Path | str, ref: str | int, *, library_id: int | None = None) -> Optional[dict[str, Any]]: + with closing(connect_readonly(sqlite_path)) as conn: + if _is_numeric_ref(ref): + row = conn.execute( + "SELECT collectionID, key, collectionName, parentCollectionID, libraryID, version FROM collections WHERE collectionID = ?", + (int(ref),), + ).fetchone() + else: + params: list[Any] = [str(ref)] + sql = "SELECT collectionID, key, collectionName, parentCollectionID, libraryID, version FROM collections WHERE key = ?" + if library_id is not None: + sql += " AND libraryID = ?" + params.append(int(library_id)) + sql += " ORDER BY libraryID, collectionID" + rows = conn.execute(sql, params).fetchall() + if not rows: + return None + if len(rows) > 1 and library_id is None: + _ambiguous_reference(ref, "collection", rows) + row = rows[0] + return dict(row) if row else None + + +def fetch_item_collections(sqlite_path: Path | str, ref: str | int) -> list[dict[str, Any]]: + item = resolve_item(sqlite_path, ref) + if not item: + return [] + with closing(connect_readonly(sqlite_path)) as conn: + rows = conn.execute( + """ + SELECT c.collectionID, c.key, c.collectionName, c.parentCollectionID, c.libraryID + FROM collectionItems ci + JOIN collections c ON c.collectionID = ci.collectionID + WHERE ci.itemID = ? + ORDER BY c.collectionName COLLATE NOCASE, c.collectionID + """, + (int(item["itemID"]),), + ).fetchall() + return _as_dicts(rows) + + +def _fetch_item_fields(conn: sqlite3.Connection, item_id: int) -> dict[str, Any]: + rows = conn.execute( + """ + SELECT f.fieldName, v.value + FROM itemData d + JOIN fields f ON f.fieldID = d.fieldID + JOIN itemDataValues v ON v.valueID = d.valueID + WHERE d.itemID = ? + ORDER BY f.fieldName COLLATE NOCASE + """, + (item_id,), + ).fetchall() + return {row["fieldName"]: row["value"] for row in rows} + + +def _fetch_item_creators(conn: sqlite3.Connection, item_id: int) -> list[dict[str, Any]]: + rows = conn.execute( + """ + SELECT c.creatorID, c.firstName, c.lastName, c.fieldMode, ic.creatorTypeID, ic.orderIndex + FROM itemCreators ic + JOIN creators c ON c.creatorID = ic.creatorID + WHERE ic.itemID = ? + ORDER BY ic.orderIndex + """, + (item_id,), + ).fetchall() + return _as_dicts(rows) + + +def _fetch_item_tags(conn: sqlite3.Connection, item_id: int) -> list[dict[str, Any]]: + rows = conn.execute( + """ + SELECT t.tagID, t.name, it.type + FROM itemTags it + JOIN tags t ON t.tagID = it.tagID + WHERE it.itemID = ? + ORDER BY t.name COLLATE NOCASE + """, + (item_id,), + ).fetchall() + return _as_dicts(rows) + + +def _base_item_select() -> str: + return """ + SELECT + i.itemID, + i.key, + i.libraryID, + i.itemTypeID, + it.typeName, + i.dateAdded, + i.dateModified, + i.version, + COALESCE( + ( + SELECT v.value + FROM itemData d + JOIN fields f ON f.fieldID = d.fieldID + JOIN itemDataValues v ON v.valueID = d.valueID + WHERE d.itemID = i.itemID AND f.fieldName = 'title' + LIMIT 1 + ), + n.title, + '' + ) AS title, + n.parentItemID AS noteParentItemID, + n.note AS noteContent, + a.parentItemID AS attachmentParentItemID, + an.parentItemID AS annotationParentItemID, + an.text AS annotationText, + an.comment AS annotationComment, + a.linkMode, + a.contentType, + a.path AS attachmentPath + FROM items i + JOIN itemTypes it ON it.itemTypeID = i.itemTypeID + LEFT JOIN itemNotes n ON n.itemID = i.itemID + LEFT JOIN itemAttachments a ON a.itemID = i.itemID + LEFT JOIN itemAnnotations an ON an.itemID = i.itemID + """ + + +def _normalize_item(conn: sqlite3.Connection, row: sqlite3.Row, include_related: bool = False) -> dict[str, Any]: + item = dict(row) + item["fields"] = _fetch_item_fields(conn, int(row["itemID"])) if include_related else {} + item["creators"] = _fetch_item_creators(conn, int(row["itemID"])) if include_related else [] + item["tags"] = _fetch_item_tags(conn, int(row["itemID"])) if include_related else [] + item["isAttachment"] = row["typeName"] == "attachment" + item["isNote"] = row["typeName"] == "note" + item["isAnnotation"] = row["typeName"] == "annotation" + item["parentItemID"] = row["attachmentParentItemID"] or row["noteParentItemID"] or row["annotationParentItemID"] + item["noteText"] = note_html_to_text(row["noteContent"]) + item["notePreview"] = note_preview(row["noteContent"]) + return item + + +def fetch_items( + sqlite_path: Path | str, + *, + library_id: int | None = None, + collection_id: int | None = None, + parent_item_id: int | None = None, + tag: str | None = None, + limit: int | None = None, +) -> list[dict[str, Any]]: + where = ["1=1"] + params: list[Any] = [] + if library_id is not None: + where.append("i.libraryID = ?") + params.append(library_id) + if collection_id is not None: + where.append("EXISTS (SELECT 1 FROM collectionItems ci WHERE ci.itemID = i.itemID AND ci.collectionID = ?)") + params.append(collection_id) + if parent_item_id is None: + where.append("COALESCE(a.parentItemID, n.parentItemID, an.parentItemID) IS NULL") + else: + where.append("COALESCE(a.parentItemID, n.parentItemID, an.parentItemID) = ?") + params.append(parent_item_id) + if tag is not None: + where.append( + """ + EXISTS ( + SELECT 1 + FROM itemTags it2 + JOIN tags t2 ON t2.tagID = it2.tagID + WHERE it2.itemID = i.itemID AND (t2.name = ? OR t2.tagID = ?) + ) + """ + ) + params.extend([tag, int(tag) if _is_numeric_ref(tag) else -1]) + sql = _base_item_select() + f"\nWHERE {' AND '.join(where)}\nORDER BY i.dateModified DESC, i.itemID DESC" + if limit is not None: + sql += f"\nLIMIT {int(limit)}" + with closing(connect_readonly(sqlite_path)) as conn: + rows = conn.execute(sql, params).fetchall() + return [_normalize_item(conn, row, include_related=False) for row in rows] + + +def find_items_by_title( + sqlite_path: Path | str, + query: str, + *, + library_id: int | None = None, + collection_id: int | None = None, + limit: int = 20, + exact_title: bool = False, +) -> list[dict[str, Any]]: + query = query.strip() + if not query: + return [] + title_expr = """ + LOWER( + COALESCE( + ( + SELECT v.value + FROM itemData d + JOIN fields f ON f.fieldID = d.fieldID + JOIN itemDataValues v ON v.valueID = d.valueID + WHERE d.itemID = i.itemID AND f.fieldName = 'title' + LIMIT 1 + ), + n.title, + '' + ) + ) + """ + where = ["1=1"] + params: list[Any] = [] + if library_id is not None: + where.append("i.libraryID = ?") + params.append(library_id) + if collection_id is not None: + where.append("EXISTS (SELECT 1 FROM collectionItems ci WHERE ci.itemID = i.itemID AND ci.collectionID = ?)") + params.append(collection_id) + where.append("COALESCE(a.parentItemID, n.parentItemID, an.parentItemID) IS NULL") + if exact_title: + where.append(f"{title_expr} = ?") + params.append(query.lower()) + else: + where.append(f"{title_expr} LIKE ?") + params.append(f"%{query.lower()}%") + sql = ( + "SELECT * FROM (" + + _base_item_select() + + f"\nWHERE {' AND '.join(where)}\n) AS base\n" + + """ + ORDER BY + CASE + WHEN LOWER(title) = ? THEN 0 + WHEN LOWER(title) LIKE ? THEN 1 + ELSE 2 + END, + INSTR(LOWER(title), ?), + dateModified DESC, + itemID DESC + LIMIT ? + """ + ) + params.extend([query.lower(), f"{query.lower()}%", query.lower(), int(limit)]) + with closing(connect_readonly(sqlite_path)) as conn: + rows = conn.execute(sql, params).fetchall() + return [_normalize_item(conn, row, include_related=False) for row in rows] + + +def resolve_item(sqlite_path: Path | str, ref: str | int, *, library_id: int | None = None) -> Optional[dict[str, Any]]: + params: list[Any] + if _is_numeric_ref(ref): + where = "i.itemID = ?" + params = [int(ref)] + else: + where = "i.key = ?" + params = [str(ref)] + if library_id is not None: + where += " AND i.libraryID = ?" + params.append(int(library_id)) + with closing(connect_readonly(sqlite_path)) as conn: + rows = conn.execute(_base_item_select() + f"\nWHERE {where}\nORDER BY i.libraryID, i.itemID", params).fetchall() + if not rows: + return None + if len(rows) > 1 and library_id is None and not _is_numeric_ref(ref): + _ambiguous_reference(ref, "item", rows) + return _normalize_item(conn, rows[0], include_related=True) + + +def fetch_item_children(sqlite_path: Path | str, ref: str | int) -> list[dict[str, Any]]: + item = resolve_item(sqlite_path, ref) + if not item: + return [] + return fetch_items(sqlite_path, parent_item_id=int(item["itemID"])) + + +def fetch_item_notes(sqlite_path: Path | str, ref: str | int) -> list[dict[str, Any]]: + children = fetch_item_children(sqlite_path, ref) + return [child for child in children if child["typeName"] == "note"] + + +def fetch_item_attachments(sqlite_path: Path | str, ref: str | int) -> list[dict[str, Any]]: + children = fetch_item_children(sqlite_path, ref) + return [child for child in children if child["typeName"] == "attachment"] + + +def resolve_attachment_real_path(item: dict[str, Any], data_dir: Path | str) -> Optional[str]: + raw_path = item.get("attachmentPath") + if not raw_path: + return None + raw_path = str(raw_path) + data_dir = Path(data_dir) + if raw_path.startswith("storage:"): + filename = raw_path.split(":", 1)[1] + return str((data_dir / "storage" / item["key"] / filename).resolve()) + if raw_path.startswith("file://"): + parsed = urlparse(raw_path) + decoded_path = unquote(parsed.path) + if parsed.netloc and parsed.netloc.lower() != "localhost": + unc_path = f"\\\\{parsed.netloc}{decoded_path.replace('/', '\\')}" + return str(PureWindowsPath(unc_path)) + if re.match(r"^/[A-Za-z]:", decoded_path): + return str(PureWindowsPath(decoded_path.lstrip("/"))) + return decoded_path if os.name != "nt" else str(PureWindowsPath(decoded_path)) + path = Path(raw_path) + if path.is_absolute(): + return str(path) + return str((data_dir / raw_path).resolve()) + + +def fetch_saved_searches(sqlite_path: Path | str, library_id: int | None = None) -> list[dict[str, Any]]: + with closing(connect_readonly(sqlite_path)) as conn: + rows = conn.execute( + """ + SELECT savedSearchID, savedSearchName, clientDateModified, libraryID, key, version + FROM savedSearches + WHERE (? IS NULL OR libraryID = ?) + ORDER BY savedSearchName COLLATE NOCASE + """, + (library_id, library_id), + ).fetchall() + searches = _as_dicts(rows) + for search in searches: + condition_rows = conn.execute( + """ + SELECT searchConditionID, condition, operator, value, required + FROM savedSearchConditions + WHERE savedSearchID = ? + ORDER BY searchConditionID + """, + (search["savedSearchID"],), + ).fetchall() + search["conditions"] = _as_dicts(condition_rows) + return searches + + +def resolve_saved_search(sqlite_path: Path | str, ref: str | int, *, library_id: int | None = None) -> Optional[dict[str, Any]]: + searches = fetch_saved_searches(sqlite_path, library_id=library_id) + if _is_numeric_ref(ref): + for search in searches: + if str(search["savedSearchID"]) == str(ref): + return search + return None + + matches = [search for search in searches if search["key"] == str(ref)] + if not matches: + return None + if len(matches) > 1 and library_id is None: + libraries = sorted({int(search["libraryID"]) for search in matches}) + library_text = ", ".join(f"L{library_id_value}" for library_id_value in libraries) + raise AmbiguousReferenceError( + f"Ambiguous saved search reference: {ref}. Matches found in {library_text}. " + "Set the library with `session use-library ` and retry." + ) + return matches[0] + + +def fetch_tags(sqlite_path: Path | str, library_id: int | None = None) -> list[dict[str, Any]]: + with closing(connect_readonly(sqlite_path)) as conn: + rows = conn.execute( + """ + SELECT t.tagID, t.name, COUNT(it.itemID) AS itemCount + FROM tags t + JOIN itemTags it ON it.tagID = t.tagID + JOIN items i ON i.itemID = it.itemID + WHERE (? IS NULL OR i.libraryID = ?) + GROUP BY t.tagID, t.name + ORDER BY t.name COLLATE NOCASE + """, + (library_id, library_id), + ).fetchall() + return _as_dicts(rows) + + +def fetch_tag_items(sqlite_path: Path | str, tag_ref: str | int, library_id: int | None = None) -> list[dict[str, Any]]: + tag_name: str | None = None + with closing(connect_readonly(sqlite_path)) as conn: + if _is_numeric_ref(tag_ref): + row = conn.execute("SELECT name FROM tags WHERE tagID = ?", (int(tag_ref),)).fetchone() + else: + row = conn.execute("SELECT name FROM tags WHERE name = ?", (str(tag_ref),)).fetchone() + if row: + tag_name = row["name"] + if tag_name is None: + return [] + return fetch_items(sqlite_path, library_id=library_id, tag=tag_name) + + +def create_collection_record( + sqlite_path: Path | str, + *, + name: str, + library_id: int, + parent_collection_id: int | None = None, +) -> dict[str, Any]: + if not name.strip(): + raise RuntimeError("Collection name must not be empty") + backup_path = backup_database(sqlite_path) + timestamp = _timestamp_text() + with closing(connect_writable(sqlite_path)) as conn: + try: + conn.execute("BEGIN IMMEDIATE") + cursor = conn.execute( + """ + INSERT INTO collections ( + collectionName, + parentCollectionID, + clientDateModified, + libraryID, + key, + version, + synced + ) + VALUES (?, ?, ?, ?, ?, 0, 0) + """, + (name.strip(), parent_collection_id, timestamp, int(library_id), generate_object_key()), + ) + collection_id = int(cursor.lastrowid) + conn.commit() + except Exception: + conn.rollback() + raise + created = resolve_collection(sqlite_path, collection_id) + assert created is not None + created["backupPath"] = str(backup_path) + return created + + +def add_item_to_collection_record( + sqlite_path: Path | str, + *, + item_id: int, + collection_id: int, +) -> dict[str, Any]: + backup_path = backup_database(sqlite_path) + with closing(connect_writable(sqlite_path)) as conn: + try: + conn.execute("BEGIN IMMEDIATE") + existing = conn.execute( + "SELECT 1 FROM collectionItems WHERE collectionID = ? AND itemID = ?", + (int(collection_id), int(item_id)), + ).fetchone() + created = False + order_index = None + if not existing: + row = conn.execute( + "SELECT COALESCE(MAX(orderIndex), -1) + 1 AS nextIndex FROM collectionItems WHERE collectionID = ?", + (int(collection_id),), + ).fetchone() + order_index = int(row["nextIndex"]) if row else 0 + conn.execute( + "INSERT INTO collectionItems (collectionID, itemID, orderIndex) VALUES (?, ?, ?)", + (int(collection_id), int(item_id), order_index), + ) + created = True + conn.commit() + except Exception: + conn.rollback() + raise + return { + "backupPath": str(backup_path), + "created": created, + "collectionID": int(collection_id), + "itemID": int(item_id), + "orderIndex": order_index, + } + + +def move_item_between_collections_record( + sqlite_path: Path | str, + *, + item_id: int, + target_collection_id: int, + source_collection_ids: list[int], +) -> dict[str, Any]: + backup_path = backup_database(sqlite_path) + with closing(connect_writable(sqlite_path)) as conn: + try: + conn.execute("BEGIN IMMEDIATE") + existing = conn.execute( + "SELECT 1 FROM collectionItems WHERE collectionID = ? AND itemID = ?", + (int(target_collection_id), int(item_id)), + ).fetchone() + added_to_target = False + if not existing: + row = conn.execute( + "SELECT COALESCE(MAX(orderIndex), -1) + 1 AS nextIndex FROM collectionItems WHERE collectionID = ?", + (int(target_collection_id),), + ).fetchone() + next_index = int(row["nextIndex"]) if row else 0 + conn.execute( + "INSERT INTO collectionItems (collectionID, itemID, orderIndex) VALUES (?, ?, ?)", + (int(target_collection_id), int(item_id), next_index), + ) + added_to_target = True + + removed = 0 + for source_collection_id in source_collection_ids: + if int(source_collection_id) == int(target_collection_id): + continue + cursor = conn.execute( + "DELETE FROM collectionItems WHERE collectionID = ? AND itemID = ?", + (int(source_collection_id), int(item_id)), + ) + removed += int(cursor.rowcount) + conn.commit() + except Exception: + conn.rollback() + raise + return { + "backupPath": str(backup_path), + "itemID": int(item_id), + "targetCollectionID": int(target_collection_id), + "removedCount": removed, + "addedToTarget": added_to_target, + } diff --git a/zotero/agent-harness/cli_anything/zotero/zotero_cli.py b/zotero/agent-harness/cli_anything/zotero/zotero_cli.py new file mode 100644 index 000000000..2e116be75 --- /dev/null +++ b/zotero/agent-harness/cli_anything/zotero/zotero_cli.py @@ -0,0 +1,984 @@ +from __future__ import annotations + +import json +import shlex +import sys +from typing import Any + +import click + +from cli_anything.zotero import __version__ +from cli_anything.zotero.core import analysis, catalog, discovery, experimental, imports, notes, rendering, session as session_mod +from cli_anything.zotero.utils.repl_skin import ReplSkin + +try: + from prompt_toolkit.output.win32 import NoConsoleScreenBufferError +except Exception: # pragma: no cover - platform-specific import guard + NoConsoleScreenBufferError = RuntimeError + + +CONTEXT_SETTINGS = {"ignore_unknown_options": False} + + +def _stdout_encoding() -> str: + return getattr(sys.stdout, "encoding", None) or "utf-8" + + +def _can_encode_for_stdout(text: str) -> bool: + try: + text.encode(_stdout_encoding()) + except UnicodeEncodeError: + return False + return True + + +def _safe_text_for_stdout(text: str) -> str: + if _can_encode_for_stdout(text): + return text + return text.encode(_stdout_encoding(), errors="backslashreplace").decode(_stdout_encoding()) + + +def _json_text(data: Any) -> str: + text = json.dumps(data, ensure_ascii=False, indent=2) + if _can_encode_for_stdout(text): + return text + return json.dumps(data, ensure_ascii=True, indent=2) + + +def root_json_output(ctx: click.Context | None) -> bool: + if ctx is None: + return False + root = ctx.find_root() + if root is None or root.obj is None: + return False + return bool(root.obj.get("json_output")) + + +def current_runtime(ctx: click.Context) -> discovery.RuntimeContext: + root = ctx.find_root() + assert root is not None + root.ensure_object(dict) + cached = root.obj.get("runtime") + config = root.obj.get("config", {}) + if cached is None: + cached = discovery.build_runtime_context( + backend=config.get("backend", "auto"), + data_dir=config.get("data_dir"), + profile_dir=config.get("profile_dir"), + executable=config.get("executable"), + ) + root.obj["runtime"] = cached + return cached + + +def current_session() -> dict[str, Any]: + return session_mod.load_session_state() + + +def emit(ctx: click.Context | None, data: Any, *, message: str = "") -> None: + if root_json_output(ctx): + click.echo(_json_text(data)) + return + if isinstance(data, str): + click.echo(_safe_text_for_stdout(data)) + return + if message: + click.echo(_safe_text_for_stdout(message)) + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + click.echo(_json_text(item)) + else: + click.echo(_safe_text_for_stdout(str(item))) + if not data: + click.echo("[]") + return + if isinstance(data, dict): + click.echo(_json_text(data)) + return + click.echo(_safe_text_for_stdout(str(data))) + + +def _print_collection_tree(nodes: list[dict[str, Any]], level: int = 0) -> None: + prefix = " " * level + for node in nodes: + click.echo(f"{prefix}- {node['collectionName']} [{node['collectionID']}]") + _print_collection_tree(node.get("children", []), level + 1) + + +def _require_experimental_flag(enabled: bool, command_name: str) -> None: + if not enabled: + raise click.ClickException( + f"`{command_name}` is experimental and writes directly to zotero.sqlite. " + "Pass --experimental to continue." + ) + + +def _normalize_session_library(runtime: discovery.RuntimeContext, library_ref: str) -> int: + try: + library_id = catalog.resolve_library_id(runtime, library_ref) + except RuntimeError as exc: + raise click.ClickException(str(exc)) from exc + if library_id is None: + raise click.ClickException("Library reference required") + return library_id + + +def _import_exit_code(payload: dict[str, Any]) -> int: + return 1 if payload.get("status") == "partial_success" else 0 + + +@click.group(context_settings=CONTEXT_SETTINGS, invoke_without_command=True) +@click.option("--json", "json_output", is_flag=True, help="Emit machine-readable JSON.") +@click.option("--backend", type=click.Choice(["auto", "sqlite", "api"]), default="auto", show_default=True) +@click.option("--data-dir", default=None, help="Explicit Zotero data directory.") +@click.option("--profile-dir", default=None, help="Explicit Zotero profile directory.") +@click.option("--executable", default=None, help="Explicit Zotero executable path.") +@click.pass_context +def cli(ctx: click.Context, json_output: bool, backend: str, data_dir: str | None, profile_dir: str | None, executable: str | None) -> int: + """Agent-native Zotero CLI using SQLite, connector, and Local API backends.""" + ctx.ensure_object(dict) + ctx.obj["json_output"] = json_output + ctx.obj["config"] = { + "backend": backend, + "data_dir": data_dir, + "profile_dir": profile_dir, + "executable": executable, + } + if ctx.invoked_subcommand is None: + return run_repl() + return 0 + + +@cli.group() +def app() -> None: + """Application and runtime inspection commands.""" + + +@app.command("status") +@click.pass_context +def app_status(ctx: click.Context) -> int: + runtime = current_runtime(ctx) + emit(ctx, runtime.to_status_payload()) + return 0 + + +@app.command("version") +@click.pass_context +def app_version(ctx: click.Context) -> int: + runtime = current_runtime(ctx) + payload = {"package_version": __version__, "zotero_version": runtime.environment.version} + emit(ctx, payload if root_json_output(ctx) else runtime.environment.version) + return 0 + + +@app.command("launch") +@click.option("--wait-timeout", default=30, show_default=True, type=int) +@click.pass_context +def app_launch(ctx: click.Context, wait_timeout: int) -> int: + runtime = current_runtime(ctx) + payload = discovery.launch_zotero(runtime, wait_timeout=wait_timeout) + ctx.find_root().obj["runtime"] = None + emit(ctx, payload) + return 0 + + +@app.command("enable-local-api") +@click.option("--launch", "launch_after_enable", is_flag=True, help="Launch Zotero and verify connector + Local API after enabling.") +@click.option("--wait-timeout", default=30, show_default=True, type=int) +@click.pass_context +def app_enable_local_api(ctx: click.Context, launch_after_enable: bool, wait_timeout: int) -> int: + payload = imports.enable_local_api(current_runtime(ctx), launch=launch_after_enable, wait_timeout=wait_timeout) + ctx.find_root().obj["runtime"] = None + emit(ctx, payload) + return 0 + + +@app.command("ping") +@click.pass_context +def app_ping(ctx: click.Context) -> int: + runtime = current_runtime(ctx) + if not runtime.connector_available: + raise click.ClickException(runtime.connector_message) + emit(ctx, {"connector_available": True, "message": runtime.connector_message}) + return 0 + + +@cli.group() +def collection() -> None: + """Collection inspection and selection commands.""" + + +@collection.command("list") +@click.pass_context +def collection_list(ctx: click.Context) -> int: + emit(ctx, catalog.list_collections(current_runtime(ctx), session=current_session())) + return 0 + + +@collection.command("find") +@click.argument("query") +@click.option("--limit", default=20, show_default=True, type=int) +@click.pass_context +def collection_find_command(ctx: click.Context, query: str, limit: int) -> int: + emit(ctx, catalog.find_collections(current_runtime(ctx), query, limit=limit, session=current_session())) + return 0 + + +@collection.command("tree") +@click.pass_context +def collection_tree_command(ctx: click.Context) -> int: + tree = catalog.collection_tree(current_runtime(ctx), session=current_session()) + if root_json_output(ctx): + emit(ctx, tree) + else: + _print_collection_tree(tree) + return 0 + + +@collection.command("get") +@click.argument("ref", required=False) +@click.pass_context +def collection_get(ctx: click.Context, ref: str | None) -> int: + emit(ctx, catalog.get_collection(current_runtime(ctx), ref, session=current_session())) + return 0 + + +@collection.command("items") +@click.argument("ref", required=False) +@click.pass_context +def collection_items_command(ctx: click.Context, ref: str | None) -> int: + emit(ctx, catalog.collection_items(current_runtime(ctx), ref, session=current_session())) + return 0 + + +def _persist_selected_collection(selected: dict[str, Any]) -> dict[str, Any]: + state = current_session() + state["current_library"] = selected.get("libraryID") + state["current_collection"] = selected.get("id") + session_mod.save_session_state(state) + return state + + +@collection.command("use-selected") +@click.pass_context +def collection_use_selected(ctx: click.Context) -> int: + selected = catalog.use_selected_collection(current_runtime(ctx)) + _persist_selected_collection(selected) + session_mod.append_command_history("collection use-selected") + emit(ctx, selected) + return 0 + + +@collection.command("create") +@click.argument("name") +@click.option("--parent", "parent_ref", default=None, help="Parent collection ID or key.") +@click.option("--library", "library_ref", default=None, help="Library ID or treeView ID (user library only).") +@click.option("--experimental", "experimental_mode", is_flag=True, help="Acknowledge experimental direct SQLite write mode.") +@click.pass_context +def collection_create_command( + ctx: click.Context, + name: str, + parent_ref: str | None, + library_ref: str | None, + experimental_mode: bool, +) -> int: + _require_experimental_flag(experimental_mode, "collection create") + emit( + ctx, + experimental.create_collection( + current_runtime(ctx), + name, + parent_ref=parent_ref, + library_ref=library_ref, + session=current_session(), + ), + ) + return 0 + + +@cli.group() +def item() -> None: + """Item inspection and rendering commands.""" + + +@item.command("list") +@click.option("--limit", default=20, show_default=True, type=int) +@click.pass_context +def item_list(ctx: click.Context, limit: int) -> int: + emit(ctx, catalog.list_items(current_runtime(ctx), session=current_session(), limit=limit)) + return 0 + + +@item.command("find") +@click.argument("query") +@click.option("--collection", "collection_ref", default=None, help="Collection ID or key scope.") +@click.option("--limit", default=20, show_default=True, type=int) +@click.option("--exact-title", is_flag=True, help="Use exact title matching via SQLite.") +@click.pass_context +def item_find_command( + ctx: click.Context, + query: str, + collection_ref: str | None, + limit: int, + exact_title: bool, +) -> int: + emit( + ctx, + catalog.find_items( + current_runtime(ctx), + query, + collection_ref=collection_ref, + limit=limit, + exact_title=exact_title, + session=current_session(), + ), + ) + return 0 + + +@item.command("get") +@click.argument("ref", required=False) +@click.pass_context +def item_get(ctx: click.Context, ref: str | None) -> int: + emit(ctx, catalog.get_item(current_runtime(ctx), ref, session=current_session())) + return 0 + + +@item.command("children") +@click.argument("ref", required=False) +@click.pass_context +def item_children_command(ctx: click.Context, ref: str | None) -> int: + emit(ctx, catalog.item_children(current_runtime(ctx), ref, session=current_session())) + return 0 + + +@item.command("notes") +@click.argument("ref", required=False) +@click.pass_context +def item_notes_command(ctx: click.Context, ref: str | None) -> int: + emit(ctx, catalog.item_notes(current_runtime(ctx), ref, session=current_session())) + return 0 + + +@item.command("attachments") +@click.argument("ref", required=False) +@click.pass_context +def item_attachments_command(ctx: click.Context, ref: str | None) -> int: + emit(ctx, catalog.item_attachments(current_runtime(ctx), ref, session=current_session())) + return 0 + + +@item.command("file") +@click.argument("ref", required=False) +@click.pass_context +def item_file_command(ctx: click.Context, ref: str | None) -> int: + emit(ctx, catalog.item_file(current_runtime(ctx), ref, session=current_session())) + return 0 + + +@item.command("export") +@click.argument("ref", required=False) +@click.option("--format", "fmt", type=click.Choice(list(rendering.SUPPORTED_EXPORT_FORMATS)), required=True) +@click.pass_context +def item_export(ctx: click.Context, ref: str | None, fmt: str) -> int: + payload = rendering.export_item(current_runtime(ctx), ref, fmt, session=current_session()) + emit(ctx, payload if root_json_output(ctx) else payload["content"]) + return 0 + + +@item.command("citation") +@click.argument("ref", required=False) +@click.option("--style", default=None) +@click.option("--locale", default=None) +@click.option("--linkwrap", is_flag=True) +@click.pass_context +def item_citation(ctx: click.Context, ref: str | None, style: str | None, locale: str | None, linkwrap: bool) -> int: + payload = rendering.citation_item(current_runtime(ctx), ref, style=style, locale=locale, linkwrap=linkwrap, session=current_session()) + emit(ctx, payload if root_json_output(ctx) else (payload.get("citation") or "")) + return 0 + + +@item.command("bibliography") +@click.argument("ref", required=False) +@click.option("--style", default=None) +@click.option("--locale", default=None) +@click.option("--linkwrap", is_flag=True) +@click.pass_context +def item_bibliography(ctx: click.Context, ref: str | None, style: str | None, locale: str | None, linkwrap: bool) -> int: + payload = rendering.bibliography_item(current_runtime(ctx), ref, style=style, locale=locale, linkwrap=linkwrap, session=current_session()) + emit(ctx, payload if root_json_output(ctx) else (payload.get("bibliography") or "")) + return 0 + + +@item.command("context") +@click.argument("ref", required=False) +@click.option("--include-notes", is_flag=True) +@click.option("--include-bibtex", is_flag=True) +@click.option("--include-csljson", is_flag=True) +@click.option("--include-links", is_flag=True) +@click.pass_context +def item_context_command( + ctx: click.Context, + ref: str | None, + include_notes: bool, + include_bibtex: bool, + include_csljson: bool, + include_links: bool, +) -> int: + payload = analysis.build_item_context( + current_runtime(ctx), + ref, + include_notes=include_notes, + include_bibtex=include_bibtex, + include_csljson=include_csljson, + include_links=include_links, + session=current_session(), + ) + emit(ctx, payload if root_json_output(ctx) else payload["prompt_context"]) + return 0 + + +@item.command("analyze") +@click.argument("ref", required=False) +@click.option("--question", required=True) +@click.option("--model", required=True) +@click.option("--include-notes", is_flag=True) +@click.option("--include-bibtex", is_flag=True) +@click.option("--include-csljson", is_flag=True) +@click.pass_context +def item_analyze_command( + ctx: click.Context, + ref: str | None, + question: str, + model: str, + include_notes: bool, + include_bibtex: bool, + include_csljson: bool, +) -> int: + payload = analysis.analyze_item( + current_runtime(ctx), + ref, + question=question, + model=model, + include_notes=include_notes, + include_bibtex=include_bibtex, + include_csljson=include_csljson, + session=current_session(), + ) + emit(ctx, payload if root_json_output(ctx) else payload["answer"]) + return 0 + + +@item.command("add-to-collection") +@click.argument("item_ref") +@click.argument("collection_ref") +@click.option("--experimental", "experimental_mode", is_flag=True, help="Acknowledge experimental direct SQLite write mode.") +@click.pass_context +def item_add_to_collection_command(ctx: click.Context, item_ref: str, collection_ref: str, experimental_mode: bool) -> int: + _require_experimental_flag(experimental_mode, "item add-to-collection") + emit(ctx, experimental.add_item_to_collection(current_runtime(ctx), item_ref, collection_ref, session=current_session())) + return 0 + + +@item.command("move-to-collection") +@click.argument("item_ref") +@click.argument("collection_ref") +@click.option("--from", "from_refs", multiple=True, help="Source collection ID or key. Repeatable.") +@click.option("--all-other-collections", is_flag=True, help="Remove the item from all other collections after adding the target.") +@click.option("--experimental", "experimental_mode", is_flag=True, help="Acknowledge experimental direct SQLite write mode.") +@click.pass_context +def item_move_to_collection_command( + ctx: click.Context, + item_ref: str, + collection_ref: str, + from_refs: tuple[str, ...], + all_other_collections: bool, + experimental_mode: bool, +) -> int: + _require_experimental_flag(experimental_mode, "item move-to-collection") + emit( + ctx, + experimental.move_item_to_collection( + current_runtime(ctx), + item_ref, + collection_ref, + from_refs=list(from_refs), + all_other_collections=all_other_collections, + session=current_session(), + ), + ) + return 0 + + +@cli.group() +def search() -> None: + """Saved-search inspection commands.""" + + +@search.command("list") +@click.pass_context +def search_list(ctx: click.Context) -> int: + emit(ctx, catalog.list_searches(current_runtime(ctx), session=current_session())) + return 0 + + +@search.command("get") +@click.argument("ref") +@click.pass_context +def search_get(ctx: click.Context, ref: str) -> int: + emit(ctx, catalog.get_search(current_runtime(ctx), ref, session=current_session())) + return 0 + + +@search.command("items") +@click.argument("ref") +@click.pass_context +def search_items_command(ctx: click.Context, ref: str) -> int: + emit(ctx, catalog.search_items(current_runtime(ctx), ref, session=current_session())) + return 0 + + +@cli.group() +def tag() -> None: + """Tag inspection commands.""" + + +@tag.command("list") +@click.pass_context +def tag_list(ctx: click.Context) -> int: + emit(ctx, catalog.list_tags(current_runtime(ctx), session=current_session())) + return 0 + + +@tag.command("items") +@click.argument("tag_ref") +@click.pass_context +def tag_items_command(ctx: click.Context, tag_ref: str) -> int: + emit(ctx, catalog.tag_items(current_runtime(ctx), tag_ref, session=current_session())) + return 0 + + +@cli.group() +def style() -> None: + """Installed CSL style inspection commands.""" + + +@style.command("list") +@click.pass_context +def style_list(ctx: click.Context) -> int: + emit(ctx, catalog.list_styles(current_runtime(ctx))) + return 0 + + +@cli.group("import") +def import_group() -> None: + """Official Zotero import and write commands.""" + + +@import_group.command("file") +@click.argument("path") +@click.option("--collection", "collection_ref", default=None, help="Collection ID, key, or treeViewID target.") +@click.option("--tag", "tags", multiple=True, help="Tag to apply after import. Repeatable.") +@click.option("--attachments-manifest", default=None, help="Optional JSON manifest describing attachments for imported records.") +@click.option("--attachment-delay-ms", default=0, show_default=True, type=int, help="Default delay before each URL attachment download.") +@click.option("--attachment-timeout", default=60, show_default=True, type=int, help="Default timeout in seconds for attachment download/upload.") +@click.pass_context +def import_file_command( + ctx: click.Context, + path: str, + collection_ref: str | None, + tags: tuple[str, ...], + attachments_manifest: str | None, + attachment_delay_ms: int, + attachment_timeout: int, +) -> int: + payload = imports.import_file( + current_runtime(ctx), + path, + collection_ref=collection_ref, + tags=list(tags), + session=current_session(), + attachments_manifest=attachments_manifest, + attachment_delay_ms=attachment_delay_ms, + attachment_timeout=attachment_timeout, + ) + emit(ctx, payload) + return _import_exit_code(payload) + + +@import_group.command("json") +@click.argument("path") +@click.option("--collection", "collection_ref", default=None, help="Collection ID, key, or treeViewID target.") +@click.option("--tag", "tags", multiple=True, help="Tag to apply after import. Repeatable.") +@click.option("--attachment-delay-ms", default=0, show_default=True, type=int, help="Default delay before each URL attachment download.") +@click.option("--attachment-timeout", default=60, show_default=True, type=int, help="Default timeout in seconds for attachment download/upload.") +@click.pass_context +def import_json_command( + ctx: click.Context, + path: str, + collection_ref: str | None, + tags: tuple[str, ...], + attachment_delay_ms: int, + attachment_timeout: int, +) -> int: + payload = imports.import_json( + current_runtime(ctx), + path, + collection_ref=collection_ref, + tags=list(tags), + session=current_session(), + attachment_delay_ms=attachment_delay_ms, + attachment_timeout=attachment_timeout, + ) + emit(ctx, payload) + return _import_exit_code(payload) + + +@cli.group() +def note() -> None: + """Read and add child notes.""" + + +@note.command("get") +@click.argument("ref") +@click.pass_context +def note_get_command(ctx: click.Context, ref: str) -> int: + payload = notes.get_note(current_runtime(ctx), ref, session=current_session()) + emit(ctx, payload if root_json_output(ctx) else (payload.get("noteText") or payload.get("noteContent") or "")) + return 0 + + +@note.command("add") +@click.argument("item_ref") +@click.option("--text", default=None, help="Inline note content.") +@click.option("--file", "file_path", default=None, help="Read note content from a file.") +@click.option("--format", "fmt", type=click.Choice(["text", "markdown", "html"]), default="text", show_default=True) +@click.pass_context +def note_add_command( + ctx: click.Context, + item_ref: str, + text: str | None, + file_path: str | None, + fmt: str, +) -> int: + emit( + ctx, + notes.add_note( + current_runtime(ctx), + item_ref, + text=text, + file_path=file_path, + fmt=fmt, + session=current_session(), + ), + ) + return 0 + + +@cli.group() +def session() -> None: + """Session and REPL context commands.""" + + +@session.command("status") +@click.pass_context +def session_status(ctx: click.Context) -> int: + emit(ctx, session_mod.build_session_payload(current_session())) + return 0 + + +@session.command("use-library") +@click.argument("library_ref") +@click.pass_context +def session_use_library(ctx: click.Context, library_ref: str) -> int: + state = current_session() + state["current_library"] = _normalize_session_library(current_runtime(ctx), library_ref) + session_mod.save_session_state(state) + session_mod.append_command_history(f"session use-library {library_ref}") + emit(ctx, session_mod.build_session_payload(state)) + return 0 + + +@session.command("use-collection") +@click.argument("collection_ref") +@click.pass_context +def session_use_collection(ctx: click.Context, collection_ref: str) -> int: + state = current_session() + state["current_collection"] = collection_ref + session_mod.save_session_state(state) + session_mod.append_command_history(f"session use-collection {collection_ref}") + emit(ctx, session_mod.build_session_payload(state)) + return 0 + + +@session.command("use-item") +@click.argument("item_ref") +@click.pass_context +def session_use_item(ctx: click.Context, item_ref: str) -> int: + state = current_session() + state["current_item"] = item_ref + session_mod.save_session_state(state) + session_mod.append_command_history(f"session use-item {item_ref}") + emit(ctx, session_mod.build_session_payload(state)) + return 0 + + +@session.command("use-selected") +@click.pass_context +def session_use_selected(ctx: click.Context) -> int: + selected = catalog.use_selected_collection(current_runtime(ctx)) + state = _persist_selected_collection(selected) + session_mod.append_command_history("session use-selected") + emit(ctx, {"selected": selected, "session": session_mod.build_session_payload(state)}) + return 0 + + +@session.command("clear-library") +@click.pass_context +def session_clear_library(ctx: click.Context) -> int: + state = current_session() + state["current_library"] = None + session_mod.save_session_state(state) + session_mod.append_command_history("session clear-library") + emit(ctx, session_mod.build_session_payload(state)) + return 0 + + +@session.command("clear-collection") +@click.pass_context +def session_clear_collection(ctx: click.Context) -> int: + state = current_session() + state["current_collection"] = None + session_mod.save_session_state(state) + session_mod.append_command_history("session clear-collection") + emit(ctx, session_mod.build_session_payload(state)) + return 0 + + +@session.command("clear-item") +@click.pass_context +def session_clear_item(ctx: click.Context) -> int: + state = current_session() + state["current_item"] = None + session_mod.save_session_state(state) + session_mod.append_command_history("session clear-item") + emit(ctx, session_mod.build_session_payload(state)) + return 0 + + +@session.command("history") +@click.option("--limit", default=10, show_default=True, type=int) +@click.pass_context +def session_history(ctx: click.Context, limit: int) -> int: + emit(ctx, {"history": current_session().get("command_history", [])[-limit:]}) + return 0 + + +def repl_help_text() -> str: + return """Interactive REPL for cli-anything-zotero + +Builtins: + help Show this help + exit, quit Leave the REPL + current-library Show the current library reference + current-collection Show the current collection reference + current-item Show the current item reference + use-library Persist current library + use-collection Persist current collection + use-item Persist current item + use-selected Read and persist the collection selected in Zotero + clear-library Clear current library + clear-collection Clear current collection + clear-item Clear current item + status Show current session status + history [limit] Show recent command history + state-path Show the session state file path +""" + + +def _handle_repl_builtin(argv: list[str], skin: ReplSkin) -> tuple[bool, int]: + if not argv: + return True, 0 + cmd = argv[0] + state = current_session() + if cmd in {"exit", "quit"}: + return True, 1 + if cmd == "help": + click.echo(repl_help_text()) + return True, 0 + if cmd == "current-library": + click.echo(f"Current library: {state.get('current_library') or ''}") + return True, 0 + if cmd == "current-collection": + click.echo(f"Current collection: {state.get('current_collection') or ''}") + return True, 0 + if cmd == "current-item": + click.echo(f"Current item: {state.get('current_item') or ''}") + return True, 0 + if cmd == "status": + click.echo(json.dumps(session_mod.build_session_payload(state), ensure_ascii=False, indent=2)) + return True, 0 + if cmd == "history": + limit = 10 + if len(argv) > 1: + try: + limit = max(1, int(argv[1])) + except ValueError: + skin.warning(f"history limit must be an integer: {argv[1]}") + return True, 0 + click.echo(json.dumps({"history": state.get("command_history", [])[-limit:]}, ensure_ascii=False, indent=2)) + return True, 0 + if cmd == "state-path": + click.echo(str(session_mod.session_state_path())) + return True, 0 + if cmd == "use-library" and len(argv) > 1: + library_ref = " ".join(argv[1:]) + try: + state["current_library"] = _normalize_session_library(discovery.build_runtime_context(), library_ref) + except click.ClickException as exc: + skin.error(exc.format_message()) + return True, 0 + session_mod.save_session_state(state) + session_mod.append_command_history(f"use-library {library_ref}") + click.echo(f"Current library: {state['current_library']}") + return True, 0 + if cmd == "use-collection" and len(argv) > 1: + state["current_collection"] = " ".join(argv[1:]) + session_mod.save_session_state(state) + session_mod.append_command_history(f"use-collection {' '.join(argv[1:])}") + click.echo(f"Current collection: {state['current_collection']}") + return True, 0 + if cmd == "use-item" and len(argv) > 1: + state["current_item"] = " ".join(argv[1:]) + session_mod.save_session_state(state) + session_mod.append_command_history(f"use-item {' '.join(argv[1:])}") + click.echo(f"Current item: {state['current_item']}") + return True, 0 + if cmd == "clear-library": + state["current_library"] = None + session_mod.save_session_state(state) + click.echo("Current library cleared.") + return True, 0 + if cmd == "clear-collection": + state["current_collection"] = None + session_mod.save_session_state(state) + click.echo("Current collection cleared.") + return True, 0 + if cmd == "clear-item": + state["current_item"] = None + session_mod.save_session_state(state) + click.echo("Current item cleared.") + return True, 0 + if cmd == "use-selected": + try: + runtime = discovery.build_runtime_context() + selected = catalog.use_selected_collection(runtime) + except Exception as exc: + skin.error(str(exc)) + return True, 0 + _persist_selected_collection(selected) + session_mod.append_command_history("use-selected") + click.echo(json.dumps(selected, ensure_ascii=False, indent=2)) + return True, 0 + return False, 0 + + +def _supports_fancy_repl_output() -> bool: + is_tty = getattr(sys.stdout, "isatty", lambda: False)() + if not is_tty: + return False + encoding = getattr(sys.stdout, "encoding", None) or "utf-8" + try: + "โ–ธโ†‘โŠ™๏นž".encode(encoding) + except UnicodeEncodeError: + return False + return True + + +def _safe_print_banner(skin: ReplSkin) -> None: + if not _supports_fancy_repl_output(): + click.echo("cli-anything-zotero REPL") + click.echo(f"Skill: {skin.skill_path}") + click.echo("Type help for commands, quit to exit") + return + try: + skin.print_banner() + except UnicodeEncodeError: + click.echo("cli-anything-zotero REPL") + click.echo(f"Skill: {skin.skill_path}") + click.echo("Type help for commands, quit to exit") + + +def _safe_print_goodbye(skin: ReplSkin) -> None: + if not _supports_fancy_repl_output(): + click.echo("Goodbye!") + return + try: + skin.print_goodbye() + except UnicodeEncodeError: + click.echo("Goodbye!") + + +def run_repl() -> int: + skin = ReplSkin("zotero", version=__version__) + prompt_session = None + try: + prompt_session = skin.create_prompt_session() + except NoConsoleScreenBufferError: + prompt_session = None + _safe_print_banner(skin) + while True: + try: + if prompt_session is None: + line = input("zotero> ").strip() + else: + line = skin.get_input(prompt_session).strip() + except EOFError: + click.echo() + _safe_print_goodbye(skin) + return 0 + except KeyboardInterrupt: + click.echo() + continue + if not line: + continue + try: + argv = shlex.split(line) + except ValueError as exc: + skin.error(f"parse error: {exc}") + continue + handled, control = _handle_repl_builtin(argv, skin) + if handled: + if control == 1: + _safe_print_goodbye(skin) + return 0 + continue + expanded = session_mod.expand_repl_aliases_with_state(argv, current_session()) + result = dispatch(expanded) + if result not in (0, None): + skin.warning(f"command exited with status {result}") + else: + session_mod.append_command_history(line) + + +@cli.command("repl") +def repl_command() -> int: + """Start the interactive REPL.""" + return run_repl() + + +def dispatch(argv: list[str] | None = None, prog_name: str | None = None) -> int: + args = list(sys.argv[1:] if argv is None else argv) + try: + result = cli.main(args=args, prog_name=prog_name or "cli-anything-zotero", standalone_mode=False) + except click.exceptions.Exit as exc: + return int(exc.exit_code) + except click.ClickException as exc: + exc.show() + return int(exc.exit_code) + return int(result or 0) + + +def entrypoint(argv: list[str] | None = None) -> int: + return dispatch(argv, prog_name=sys.argv[0]) diff --git a/zotero/agent-harness/pyproject.toml b/zotero/agent-harness/pyproject.toml new file mode 100644 index 000000000..09977b5b8 --- /dev/null +++ b/zotero/agent-harness/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" diff --git a/zotero/agent-harness/setup.py b/zotero/agent-harness/setup.py new file mode 100644 index 000000000..6c7712517 --- /dev/null +++ b/zotero/agent-harness/setup.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import sys +from pathlib import Path + + +PACKAGE_NAME = "cli-anything-zotero" +PACKAGE_VERSION = "0.1.0" + + +def _handle_metadata_query(argv: list[str]) -> bool: + if len(argv) != 2: + return False + if argv[1] == "--name": + print(PACKAGE_NAME) + return True + if argv[1] == "--version": + print(PACKAGE_VERSION) + return True + return False + + +if __name__ == "__main__" and _handle_metadata_query(sys.argv): + raise SystemExit(0) + +from setuptools import find_namespace_packages, setup + + +ROOT = Path(__file__).parent +README = ROOT / "cli_anything" / "zotero" / "README.md" +LONG_DESCRIPTION = README.read_text(encoding="utf-8") if README.exists() else "" + + +setup( + name=PACKAGE_NAME, + version=PACKAGE_VERSION, + author="cli-anything contributors", + author_email="", + description="Agent-native CLI harness for Zotero using SQLite, connector, and Local API backends", + long_description=LONG_DESCRIPTION, + long_description_content_type="text/markdown", + url="https://github.com/HKUDS/CLI-Anything", + packages=find_namespace_packages(include=["cli_anything.*"]), + python_requires=">=3.10", + install_requires=[ + "click>=8.0.0", + "prompt-toolkit>=3.0.0", + ], + extras_require={ + "dev": [ + "pytest>=7.0.0", + "pytest-cov>=4.0.0", + ], + }, + entry_points={ + "console_scripts": [ + "cli-anything-zotero=cli_anything.zotero.zotero_cli:entrypoint", + ], + }, + package_data={ + "cli_anything.zotero": ["README.md"], + "cli_anything.zotero.skills": ["SKILL.md"], + "cli_anything.zotero.tests": ["TEST.md"], + }, + include_package_data=True, + zip_safe=False, +) diff --git a/zotero/agent-harness/skill_generator.py b/zotero/agent-harness/skill_generator.py new file mode 100644 index 000000000..243d294b5 --- /dev/null +++ b/zotero/agent-harness/skill_generator.py @@ -0,0 +1,239 @@ +""" +SKILL.md Generator for CLI-Anything harnesses. +""" + +from __future__ import annotations + +import argparse +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + + +def _format_display_name(name: str) -> str: + return name.replace("_", " ").replace("-", " ").title() + + +@dataclass +class CommandInfo: + name: str + description: str + + +@dataclass +class CommandGroup: + name: str + description: str + commands: list[CommandInfo] = field(default_factory=list) + + +@dataclass +class Example: + title: str + description: str + code: str + + +@dataclass +class SkillMetadata: + skill_name: str + skill_description: str + software_name: str + skill_intro: str + version: str + command_groups: list[CommandGroup] = field(default_factory=list) + examples: list[Example] = field(default_factory=list) + + +def extract_intro_from_readme(content: str) -> str: + lines = content.splitlines() + intro: list[str] = [] + seen_title = False + for line in lines: + stripped = line.strip() + if not stripped: + if seen_title and intro: + break + continue + if stripped.startswith("# "): + seen_title = True + continue + if stripped.startswith("##"): + break + if seen_title: + intro.append(stripped) + return " ".join(intro) or "Agent-native CLI interface." + + +def extract_version_from_setup(setup_path: Path) -> str: + content = setup_path.read_text(encoding="utf-8") + match = re.search(r'PACKAGE_VERSION\s*=\s*["\']([^"\']+)["\']', content) + if match: + return match.group(1) + match = re.search(r'version\s*=\s*["\']([^"\']+)["\']', content) + return match.group(1) if match else "1.0.0" + + +def extract_commands_from_cli(cli_path: Path) -> list[CommandGroup]: + content = cli_path.read_text(encoding="utf-8") + groups: list[CommandGroup] = [] + group_name_by_function: dict[str, str] = {} + + group_pattern = ( + r'@cli\.group(?:\(([^)]*)\))?' + r'(?:\s*@[\w.]+(?:\([^)]*\))?)*' + r'\s*def\s+(\w+)\([^)]*\)' + r'(?:\s*->\s*[^:]+)?' + r':\s*' + r'(?:"""([\s\S]*?)"""|\'\'\'([\s\S]*?)\'\'\')?' + ) + for match in re.finditer(group_pattern, content): + decorator_args = match.group(1) or "" + func_name = match.group(2) + doc = (match.group(3) or match.group(4) or "").strip() + explicit_name = re.search(r'["\']([^"\']+)["\']', decorator_args) + name = explicit_name.group(1) if explicit_name else func_name.replace("_", " ") + display_name = name.replace("-", " ").title() + group_name_by_function[func_name] = display_name + groups.append(CommandGroup(name=display_name, description=doc or f"Commands for {name}.")) + + command_pattern = ( + r'@(\w+)\.command(?:\(([^)]*)\))?' + r'(?:\s*@[\w.]+(?:\([^)]*\))?)*' + r'\s*def\s+(\w+)\([^)]*\)' + r'(?:\s*->\s*[^:]+)?' + r':\s*' + r'(?:"""([\s\S]*?)"""|\'\'\'([\s\S]*?)\'\'\')?' + ) + for match in re.finditer(command_pattern, content): + group_func = match.group(1) + decorator_args = match.group(2) or "" + func_name = match.group(3) + doc = (match.group(4) or match.group(5) or "").strip() + explicit_name = re.search(r'["\']([^"\']+)["\']', decorator_args) + cmd_name = explicit_name.group(1) if explicit_name else func_name.replace("_", "-") + title = group_name_by_function.get(group_func, group_func.replace("_", " ").replace("-", " ").title()) + for group in groups: + if group.name == title: + group.commands.append(CommandInfo(cmd_name, doc or f"Execute `{cmd_name}`.")) + break + return groups + + +def generate_examples(software_name: str) -> list[Example]: + return [ + Example("Runtime Status", "Inspect Zotero paths and backend availability.", f"cli-anything-{software_name} app status --json"), + Example("Read Selected Collection", "Persist the collection selected in the Zotero GUI.", f"cli-anything-{software_name} collection use-selected --json"), + Example("Render Citation", "Render a citation using Zotero's Local API.", f"cli-anything-{software_name} item citation --style apa --locale en-US --json"), + Example("Add Child Note", "Create a child note under an existing Zotero item.", f"cli-anything-{software_name} note add --text \"Key takeaway\" --json"), + Example("Build LLM Context", "Assemble structured context for downstream model analysis.", f"cli-anything-{software_name} item context --include-notes --include-links --json"), + ] + + +def extract_cli_metadata(harness_path: str) -> SkillMetadata: + harness_root = Path(harness_path) + cli_root = harness_root / "cli_anything" + software_dir = next(path for path in cli_root.iterdir() if path.is_dir() and (path / "__init__.py").exists()) + software_name = software_dir.name + intro = extract_intro_from_readme((software_dir / "README.md").read_text(encoding="utf-8")) + version = extract_version_from_setup(harness_root / "setup.py") + groups = extract_commands_from_cli(software_dir / f"{software_name}_cli.py") + return SkillMetadata( + skill_name=f"cli-anything-{software_name}", + skill_description=f"CLI harness for {_format_display_name(software_name)}.", + software_name=software_name, + skill_intro=intro, + version=version, + command_groups=groups, + examples=generate_examples(software_name), + ) + + +def generate_skill_md_simple(metadata: SkillMetadata) -> str: + lines = [ + "---", + "name: >-", + f" {metadata.skill_name}", + "description: >-", + f" {metadata.skill_description}", + "---", + "", + f"# {metadata.skill_name}", + "", + metadata.skill_intro, + "", + "## Installation", + "", + "```bash", + "pip install -e .", + "```", + "", + "## Entry Points", + "", + "```bash", + f"cli-anything-{metadata.software_name}", + f"python -m cli_anything.{metadata.software_name}", + "```", + "", + "## Command Groups", + "", + ] + for group in metadata.command_groups: + lines.extend([f"### {group.name}", "", group.description, "", "| Command | Description |", "|---------|-------------|"]) + for cmd in group.commands: + lines.append(f"| `{cmd.name}` | {cmd.description} |") + lines.append("") + lines.extend(["## Examples", ""]) + for example in metadata.examples: + lines.extend([f"### {example.title}", "", example.description, "", "```bash", example.code, "```", ""]) + lines.extend(["## Version", "", metadata.version, ""]) + return "\n".join(lines) + + +def generate_skill_md(metadata: SkillMetadata, template_path: Optional[str] = None) -> str: + try: + from jinja2 import Environment, FileSystemLoader + except ImportError: + return generate_skill_md_simple(metadata) + + template = Path(template_path) if template_path else Path(__file__).parent / "templates" / "SKILL.md.template" + if not template.exists(): + return generate_skill_md_simple(metadata) + env = Environment(loader=FileSystemLoader(template.parent)) + tpl = env.get_template(template.name) + return tpl.render( + skill_name=metadata.skill_name, + skill_description=metadata.skill_description, + software_name=metadata.software_name, + skill_intro=metadata.skill_intro, + version=metadata.version, + command_groups=[ + {"name": group.name, "description": group.description, "commands": [{"name": c.name, "description": c.description} for c in group.commands]} + for group in metadata.command_groups + ], + examples=[{"title": ex.title, "description": ex.description, "code": ex.code} for ex in metadata.examples], + ) + + +def generate_skill_file(harness_path: str, output_path: Optional[str] = None, template_path: Optional[str] = None) -> str: + metadata = extract_cli_metadata(harness_path) + content = generate_skill_md(metadata, template_path=template_path) + output = Path(output_path) if output_path else Path(harness_path) / "cli_anything" / metadata.software_name / "skills" / "SKILL.md" + output.parent.mkdir(parents=True, exist_ok=True) + output.write_text(content, encoding="utf-8") + return str(output) + + +def main(argv: Optional[list[str]] = None) -> int: + parser = argparse.ArgumentParser(description="Generate SKILL.md for a CLI-Anything harness") + parser.add_argument("harness_path") + parser.add_argument("-o", "--output", default=None) + parser.add_argument("-t", "--template", default=None) + args = parser.parse_args(argv) + print(generate_skill_file(args.harness_path, output_path=args.output, template_path=args.template)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/zotero/agent-harness/templates/SKILL.md.template b/zotero/agent-harness/templates/SKILL.md.template new file mode 100644 index 000000000..a5e820398 --- /dev/null +++ b/zotero/agent-harness/templates/SKILL.md.template @@ -0,0 +1,53 @@ +--- +name: >- + {{ skill_name }} +description: >- + {{ skill_description }} +--- + +# {{ skill_name }} + +{{ skill_intro }} + +## Installation + +```bash +pip install -e . +``` + +## Entry Points + +```bash +cli-anything-{{ software_name }} +python -m cli_anything.{{ software_name }} +``` + +## Command Groups + +{% for group in command_groups %} +### {{ group.name }} + +{{ group.description }} + +| Command | Description | +|---------|-------------| +{% for cmd in group.commands %} +| `{{ cmd.name }}` | {{ cmd.description }} | +{% endfor %} + +{% endfor %} +## Examples + +{% for example in examples %} +### {{ example.title }} + +{{ example.description }} + +```bash +{{ example.code }} +``` + +{% endfor %} +## Version + +{{ version }}