From 7472028997b89bfece99f8d8c6c3b9a90ee7a90b Mon Sep 17 00:00:00 2001 From: JOJO <1498581755@qq.com> Date: Sat, 7 Feb 2026 00:20:35 +0800 Subject: [PATCH] feat: add skills framework and controls --- agentskills/docx/LICENSE.txt | 30 + agentskills/docx/SKILL.md | 481 ++ agentskills/docx/scripts/__init__.py | 1 + agentskills/docx/scripts/accept_changes.py | 135 + agentskills/docx/scripts/comment.py | 318 ++ .../docx/scripts/office/helpers/__init__.py | 0 .../docx/scripts/office/helpers/merge_runs.py | 199 + .../office/helpers/simplify_redlines.py | 197 + agentskills/docx/scripts/office/pack.py | 159 + .../schemas/ISO-IEC29500-4_2016/dml-chart.xsd | 1499 ++++++ .../ISO-IEC29500-4_2016/dml-chartDrawing.xsd | 146 + .../ISO-IEC29500-4_2016/dml-diagram.xsd | 1085 ++++ .../ISO-IEC29500-4_2016/dml-lockedCanvas.xsd | 11 + .../schemas/ISO-IEC29500-4_2016/dml-main.xsd | 3081 ++++++++++++ .../ISO-IEC29500-4_2016/dml-picture.xsd | 23 + .../dml-spreadsheetDrawing.xsd | 185 + .../dml-wordprocessingDrawing.xsd | 287 ++ .../schemas/ISO-IEC29500-4_2016/pml.xsd | 1676 +++++++ .../shared-additionalCharacteristics.xsd | 28 + .../shared-bibliography.xsd | 144 + .../shared-commonSimpleTypes.xsd | 174 + .../shared-customXmlDataProperties.xsd | 25 + .../shared-customXmlSchemaProperties.xsd | 18 + .../shared-documentPropertiesCustom.xsd | 59 + .../shared-documentPropertiesExtended.xsd | 56 + .../shared-documentPropertiesVariantTypes.xsd | 195 + .../ISO-IEC29500-4_2016/shared-math.xsd | 582 +++ .../shared-relationshipReference.xsd | 25 + .../schemas/ISO-IEC29500-4_2016/sml.xsd | 4439 +++++++++++++++++ .../schemas/ISO-IEC29500-4_2016/vml-main.xsd | 570 +++ .../ISO-IEC29500-4_2016/vml-officeDrawing.xsd | 509 ++ .../vml-presentationDrawing.xsd | 12 + .../vml-spreadsheetDrawing.xsd | 108 + .../vml-wordprocessingDrawing.xsd | 96 + .../schemas/ISO-IEC29500-4_2016/wml.xsd | 3646 ++++++++++++++ .../schemas/ISO-IEC29500-4_2016/xml.xsd | 116 + .../ecma/fouth-edition/opc-contentTypes.xsd | 42 + .../ecma/fouth-edition/opc-coreProperties.xsd | 50 + .../schemas/ecma/fouth-edition/opc-digSig.xsd | 49 + .../ecma/fouth-edition/opc-relationships.xsd | 33 + .../docx/scripts/office/schemas/mce/mc.xsd | 75 + .../office/schemas/microsoft/wml-2010.xsd | 560 +++ .../office/schemas/microsoft/wml-2012.xsd | 67 + .../office/schemas/microsoft/wml-2018.xsd | 14 + .../office/schemas/microsoft/wml-cex-2018.xsd | 20 + .../office/schemas/microsoft/wml-cid-2016.xsd | 13 + .../microsoft/wml-sdtdatahash-2020.xsd | 4 + .../schemas/microsoft/wml-symex-2015.xsd | 8 + agentskills/docx/scripts/office/soffice.py | 183 + agentskills/docx/scripts/office/unpack.py | 132 + agentskills/docx/scripts/office/validate.py | 111 + .../scripts/office/validators/__init__.py | 15 + .../docx/scripts/office/validators/base.py | 847 ++++ .../docx/scripts/office/validators/docx.py | 446 ++ .../docx/scripts/office/validators/pptx.py | 275 + .../scripts/office/validators/redlining.py | 247 + .../docx/scripts/templates/comments.xml | 3 + .../scripts/templates/commentsExtended.xml | 3 + .../scripts/templates/commentsExtensible.xml | 3 + .../docx/scripts/templates/commentsIds.xml | 3 + agentskills/docx/scripts/templates/people.xml | 3 + agentskills/frontend-design/LICENSE.txt | 177 + agentskills/frontend-design/SKILL.md | 42 + agentskills/test/SKILL.md | 15 + config/paths.py | 4 + core/main_terminal.py | 20 +- modules/api_user_manager.py | 3 +- modules/personalization_manager.py | 31 + modules/skills_manager.py | 211 + modules/user_manager.py | 3 +- prompts/skills_system.txt | 4 + server/chat.py | 38 +- server/context.py | 2 + .../personalization/PersonalizationDrawer.vue | 44 +- static/src/stores/personalization.ts | 34 + utils/context_manager.py | 35 + 76 files changed, 24178 insertions(+), 6 deletions(-) create mode 100644 agentskills/docx/LICENSE.txt create mode 100644 agentskills/docx/SKILL.md create mode 100755 agentskills/docx/scripts/__init__.py create mode 100644 agentskills/docx/scripts/accept_changes.py create mode 100644 agentskills/docx/scripts/comment.py create mode 100644 agentskills/docx/scripts/office/helpers/__init__.py create mode 100644 agentskills/docx/scripts/office/helpers/merge_runs.py create mode 100644 agentskills/docx/scripts/office/helpers/simplify_redlines.py create mode 100755 agentskills/docx/scripts/office/pack.py create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd create mode 100644 agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd create mode 100644 agentskills/docx/scripts/office/schemas/mce/mc.xsd create mode 100644 agentskills/docx/scripts/office/schemas/microsoft/wml-2010.xsd create mode 100644 agentskills/docx/scripts/office/schemas/microsoft/wml-2012.xsd create mode 100644 agentskills/docx/scripts/office/schemas/microsoft/wml-2018.xsd create mode 100644 agentskills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd create mode 100644 agentskills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd create mode 100644 agentskills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd create mode 100644 agentskills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd create mode 100644 agentskills/docx/scripts/office/soffice.py create mode 100755 agentskills/docx/scripts/office/unpack.py create mode 100755 agentskills/docx/scripts/office/validate.py create mode 100644 agentskills/docx/scripts/office/validators/__init__.py create mode 100644 agentskills/docx/scripts/office/validators/base.py create mode 100644 agentskills/docx/scripts/office/validators/docx.py create mode 100644 agentskills/docx/scripts/office/validators/pptx.py create mode 100644 agentskills/docx/scripts/office/validators/redlining.py create mode 100644 agentskills/docx/scripts/templates/comments.xml create mode 100644 agentskills/docx/scripts/templates/commentsExtended.xml create mode 100644 agentskills/docx/scripts/templates/commentsExtensible.xml create mode 100644 agentskills/docx/scripts/templates/commentsIds.xml create mode 100644 agentskills/docx/scripts/templates/people.xml create mode 100644 agentskills/frontend-design/LICENSE.txt create mode 100644 agentskills/frontend-design/SKILL.md create mode 100644 agentskills/test/SKILL.md create mode 100644 modules/skills_manager.py create mode 100644 prompts/skills_system.txt diff --git a/agentskills/docx/LICENSE.txt b/agentskills/docx/LICENSE.txt new file mode 100644 index 0000000..c55ab42 --- /dev/null +++ b/agentskills/docx/LICENSE.txt @@ -0,0 +1,30 @@ +© 2025 Anthropic, PBC. All rights reserved. + +LICENSE: Use of these materials (including all code, prompts, assets, files, +and other components of this Skill) is governed by your agreement with +Anthropic regarding use of Anthropic's services. If no separate agreement +exists, use is governed by Anthropic's Consumer Terms of Service or +Commercial Terms of Service, as applicable: +https://www.anthropic.com/legal/consumer-terms +https://www.anthropic.com/legal/commercial-terms +Your applicable agreement is referred to as the "Agreement." "Services" are +as defined in the Agreement. + +ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the +contrary, users may not: + +- Extract these materials from the Services or retain copies of these + materials outside the Services +- Reproduce or copy these materials, except for temporary copies created + automatically during authorized use of the Services +- Create derivative works based on these materials +- Distribute, sublicense, or transfer these materials to any third party +- Make, offer to sell, sell, or import any inventions embodied in these + materials +- Reverse engineer, decompile, or disassemble these materials + +The receipt, viewing, or possession of these materials does not convey or +imply any license or right beyond those expressly granted above. + +Anthropic retains all right, title, and interest in these materials, +including all copyrights, patents, and other intellectual property rights. diff --git a/agentskills/docx/SKILL.md b/agentskills/docx/SKILL.md new file mode 100644 index 0000000..ad2e175 --- /dev/null +++ b/agentskills/docx/SKILL.md @@ -0,0 +1,481 @@ +--- +name: docx +description: "Use this skill whenever the user wants to create, read, edit, or manipulate Word documents (.docx files). Triggers include: any mention of \"Word doc\", \"word document\", \".docx\", or requests to produce professional documents with formatting like tables of contents, headings, page numbers, or letterheads. Also use when extracting or reorganizing content from .docx files, inserting or replacing images in documents, performing find-and-replace in Word files, working with tracked changes or comments, or converting content into a polished Word document. If the user asks for a \"report\", \"memo\", \"letter\", \"template\", or similar deliverable as a Word or .docx file, use this skill. Do NOT use for PDFs, spreadsheets, Google Docs, or general coding tasks unrelated to document generation." +license: Proprietary. LICENSE.txt has complete terms +--- + +# DOCX creation, editing, and analysis + +## Overview + +A .docx file is a ZIP archive containing XML files. + +## Quick Reference + +| Task | Approach | +|------|----------| +| Read/analyze content | `pandoc` or unpack for raw XML | +| Create new document | Use `docx-js` - see Creating New Documents below | +| Edit existing document | Unpack → edit XML → repack - see Editing Existing Documents below | + +### Converting .doc to .docx + +Legacy `.doc` files must be converted before editing: + +```bash +python scripts/office/soffice.py --headless --convert-to docx document.doc +``` + +### Reading Content + +```bash +# Text extraction with tracked changes +pandoc --track-changes=all document.docx -o output.md + +# Raw XML access +python scripts/office/unpack.py document.docx unpacked/ +``` + +### Converting to Images + +```bash +python scripts/office/soffice.py --headless --convert-to pdf document.docx +pdftoppm -jpeg -r 150 document.pdf page +``` + +### Accepting Tracked Changes + +To produce a clean document with all tracked changes accepted (requires LibreOffice): + +```bash +python scripts/accept_changes.py input.docx output.docx +``` + +--- + +## Creating New Documents + +Generate .docx files with JavaScript, then validate. Install: `npm install -g docx` + +### Setup +```javascript +const { Document, Packer, Paragraph, TextRun, Table, TableRow, TableCell, ImageRun, + Header, Footer, AlignmentType, PageOrientation, LevelFormat, ExternalHyperlink, + TableOfContents, HeadingLevel, BorderStyle, WidthType, ShadingType, + VerticalAlign, PageNumber, PageBreak } = require('docx'); + +const doc = new Document({ sections: [{ children: [/* content */] }] }); +Packer.toBuffer(doc).then(buffer => fs.writeFileSync("doc.docx", buffer)); +``` + +### Validation +After creating the file, validate it. If validation fails, unpack, fix the XML, and repack. +```bash +python scripts/office/validate.py doc.docx +``` + +### Page Size + +```javascript +// CRITICAL: docx-js defaults to A4, not US Letter +// Always set page size explicitly for consistent results +sections: [{ + properties: { + page: { + size: { + width: 12240, // 8.5 inches in DXA + height: 15840 // 11 inches in DXA + }, + margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 } // 1 inch margins + } + }, + children: [/* content */] +}] +``` + +**Common page sizes (DXA units, 1440 DXA = 1 inch):** + +| Paper | Width | Height | Content Width (1" margins) | +|-------|-------|--------|---------------------------| +| US Letter | 12,240 | 15,840 | 9,360 | +| A4 (default) | 11,906 | 16,838 | 9,026 | + +**Landscape orientation:** docx-js swaps width/height internally, so pass portrait dimensions and let it handle the swap: +```javascript +size: { + width: 12240, // Pass SHORT edge as width + height: 15840, // Pass LONG edge as height + orientation: PageOrientation.LANDSCAPE // docx-js swaps them in the XML +}, +// Content width = 15840 - left margin - right margin (uses the long edge) +``` + +### Styles (Override Built-in Headings) + +Use Arial as the default font (universally supported). Keep titles black for readability. + +```javascript +const doc = new Document({ + styles: { + default: { document: { run: { font: "Arial", size: 24 } } }, // 12pt default + paragraphStyles: [ + // IMPORTANT: Use exact IDs to override built-in styles + { id: "Heading1", name: "Heading 1", basedOn: "Normal", next: "Normal", quickFormat: true, + run: { size: 32, bold: true, font: "Arial" }, + paragraph: { spacing: { before: 240, after: 240 }, outlineLevel: 0 } }, // outlineLevel required for TOC + { id: "Heading2", name: "Heading 2", basedOn: "Normal", next: "Normal", quickFormat: true, + run: { size: 28, bold: true, font: "Arial" }, + paragraph: { spacing: { before: 180, after: 180 }, outlineLevel: 1 } }, + ] + }, + sections: [{ + children: [ + new Paragraph({ heading: HeadingLevel.HEADING_1, children: [new TextRun("Title")] }), + ] + }] +}); +``` + +### Lists (NEVER use unicode bullets) + +```javascript +// ❌ WRONG - never manually insert bullet characters +new Paragraph({ children: [new TextRun("• Item")] }) // BAD +new Paragraph({ children: [new TextRun("\u2022 Item")] }) // BAD + +// ✅ CORRECT - use numbering config with LevelFormat.BULLET +const doc = new Document({ + numbering: { + config: [ + { reference: "bullets", + levels: [{ level: 0, format: LevelFormat.BULLET, text: "•", alignment: AlignmentType.LEFT, + style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] }, + { reference: "numbers", + levels: [{ level: 0, format: LevelFormat.DECIMAL, text: "%1.", alignment: AlignmentType.LEFT, + style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] }, + ] + }, + sections: [{ + children: [ + new Paragraph({ numbering: { reference: "bullets", level: 0 }, + children: [new TextRun("Bullet item")] }), + new Paragraph({ numbering: { reference: "numbers", level: 0 }, + children: [new TextRun("Numbered item")] }), + ] + }] +}); + +// ⚠️ Each reference creates INDEPENDENT numbering +// Same reference = continues (1,2,3 then 4,5,6) +// Different reference = restarts (1,2,3 then 1,2,3) +``` + +### Tables + +**CRITICAL: Tables need dual widths** - set both `columnWidths` on the table AND `width` on each cell. Without both, tables render incorrectly on some platforms. + +```javascript +// CRITICAL: Always set table width for consistent rendering +// CRITICAL: Use ShadingType.CLEAR (not SOLID) to prevent black backgrounds +const border = { style: BorderStyle.SINGLE, size: 1, color: "CCCCCC" }; +const borders = { top: border, bottom: border, left: border, right: border }; + +new Table({ + width: { size: 9360, type: WidthType.DXA }, // Always use DXA (percentages break in Google Docs) + columnWidths: [4680, 4680], // Must sum to table width (DXA: 1440 = 1 inch) + rows: [ + new TableRow({ + children: [ + new TableCell({ + borders, + width: { size: 4680, type: WidthType.DXA }, // Also set on each cell + shading: { fill: "D5E8F0", type: ShadingType.CLEAR }, // CLEAR not SOLID + margins: { top: 80, bottom: 80, left: 120, right: 120 }, // Cell padding (internal, not added to width) + children: [new Paragraph({ children: [new TextRun("Cell")] })] + }) + ] + }) + ] +}) +``` + +**Table width calculation:** + +Always use `WidthType.DXA` — `WidthType.PERCENTAGE` breaks in Google Docs. + +```javascript +// Table width = sum of columnWidths = content width +// US Letter with 1" margins: 12240 - 2880 = 9360 DXA +width: { size: 9360, type: WidthType.DXA }, +columnWidths: [7000, 2360] // Must sum to table width +``` + +**Width rules:** +- **Always use `WidthType.DXA`** — never `WidthType.PERCENTAGE` (incompatible with Google Docs) +- Table width must equal the sum of `columnWidths` +- Cell `width` must match corresponding `columnWidth` +- Cell `margins` are internal padding - they reduce content area, not add to cell width +- For full-width tables: use content width (page width minus left and right margins) + +### Images + +```javascript +// CRITICAL: type parameter is REQUIRED +new Paragraph({ + children: [new ImageRun({ + type: "png", // Required: png, jpg, jpeg, gif, bmp, svg + data: fs.readFileSync("image.png"), + transformation: { width: 200, height: 150 }, + altText: { title: "Title", description: "Desc", name: "Name" } // All three required + })] +}) +``` + +### Page Breaks + +```javascript +// CRITICAL: PageBreak must be inside a Paragraph +new Paragraph({ children: [new PageBreak()] }) + +// Or use pageBreakBefore +new Paragraph({ pageBreakBefore: true, children: [new TextRun("New page")] }) +``` + +### Table of Contents + +```javascript +// CRITICAL: Headings must use HeadingLevel ONLY - no custom styles +new TableOfContents("Table of Contents", { hyperlink: true, headingStyleRange: "1-3" }) +``` + +### Headers/Footers + +```javascript +sections: [{ + properties: { + page: { margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 } } // 1440 = 1 inch + }, + headers: { + default: new Header({ children: [new Paragraph({ children: [new TextRun("Header")] })] }) + }, + footers: { + default: new Footer({ children: [new Paragraph({ + children: [new TextRun("Page "), new TextRun({ children: [PageNumber.CURRENT] })] + })] }) + }, + children: [/* content */] +}] +``` + +### Critical Rules for docx-js + +- **Set page size explicitly** - docx-js defaults to A4; use US Letter (12240 x 15840 DXA) for US documents +- **Landscape: pass portrait dimensions** - docx-js swaps width/height internally; pass short edge as `width`, long edge as `height`, and set `orientation: PageOrientation.LANDSCAPE` +- **Never use `\n`** - use separate Paragraph elements +- **Never use unicode bullets** - use `LevelFormat.BULLET` with numbering config +- **PageBreak must be in Paragraph** - standalone creates invalid XML +- **ImageRun requires `type`** - always specify png/jpg/etc +- **Always set table `width` with DXA** - never use `WidthType.PERCENTAGE` (breaks in Google Docs) +- **Tables need dual widths** - `columnWidths` array AND cell `width`, both must match +- **Table width = sum of columnWidths** - for DXA, ensure they add up exactly +- **Always add cell margins** - use `margins: { top: 80, bottom: 80, left: 120, right: 120 }` for readable padding +- **Use `ShadingType.CLEAR`** - never SOLID for table shading +- **TOC requires HeadingLevel only** - no custom styles on heading paragraphs +- **Override built-in styles** - use exact IDs: "Heading1", "Heading2", etc. +- **Include `outlineLevel`** - required for TOC (0 for H1, 1 for H2, etc.) + +--- + +## Editing Existing Documents + +**Follow all 3 steps in order.** + +### Step 1: Unpack +```bash +python scripts/office/unpack.py document.docx unpacked/ +``` +Extracts XML, pretty-prints, merges adjacent runs, and converts smart quotes to XML entities (`“` etc.) so they survive editing. Use `--merge-runs false` to skip run merging. + +### Step 2: Edit XML + +Edit files in `unpacked/word/`. See XML Reference below for patterns. + +**Use "Claude" as the author** for tracked changes and comments, unless the user explicitly requests use of a different name. + +**Use the Edit tool directly for string replacement. Do not write Python scripts.** Scripts introduce unnecessary complexity. The Edit tool shows exactly what is being replaced. + +**CRITICAL: Use smart quotes for new content.** When adding text with apostrophes or quotes, use XML entities to produce smart quotes: +```xml + +Here’s a quote: “Hello” +``` +| Entity | Character | +|--------|-----------| +| `‘` | ‘ (left single) | +| `’` | ’ (right single / apostrophe) | +| `“` | “ (left double) | +| `”` | ” (right double) | + +**Adding comments:** Use `comment.py` to handle boilerplate across multiple XML files (text must be pre-escaped XML): +```bash +python scripts/comment.py unpacked/ 0 "Comment text with & and ’" +python scripts/comment.py unpacked/ 1 "Reply text" --parent 0 # reply to comment 0 +python scripts/comment.py unpacked/ 0 "Text" --author "Custom Author" # custom author name +``` +Then add markers to document.xml (see Comments in XML Reference). + +### Step 3: Pack +```bash +python scripts/office/pack.py unpacked/ output.docx --original document.docx +``` +Validates with auto-repair, condenses XML, and creates DOCX. Use `--validate false` to skip. + +**Auto-repair will fix:** +- `durableId` >= 0x7FFFFFFF (regenerates valid ID) +- Missing `xml:space="preserve"` on `` with whitespace + +**Auto-repair won't fix:** +- Malformed XML, invalid element nesting, missing relationships, schema violations + +### Common Pitfalls + +- **Replace entire `` elements**: When adding tracked changes, replace the whole `...` block with `......` as siblings. Don't inject tracked change tags inside a run. +- **Preserve `` formatting**: Copy the original run's `` block into your tracked change runs to maintain bold, font size, etc. + +--- + +## XML Reference + +### Schema Compliance + +- **Element order in ``**: ``, ``, ``, ``, ``, `` last +- **Whitespace**: Add `xml:space="preserve"` to `` with leading/trailing spaces +- **RSIDs**: Must be 8-digit hex (e.g., `00AB1234`) + +### Tracked Changes + +**Insertion:** +```xml + + inserted text + +``` + +**Deletion:** +```xml + + deleted text + +``` + +**Inside ``**: Use `` instead of ``, and `` instead of ``. + +**Minimal edits** - only mark what changes: +```xml + +The term is + + 30 + + + 60 + + days. +``` + +**Deleting entire paragraphs/list items** - when removing ALL content from a paragraph, also mark the paragraph mark as deleted so it merges with the next paragraph. Add `` inside ``: +```xml + + + ... + + + + + + Entire paragraph content being deleted... + + +``` +Without the `` in ``, accepting changes leaves an empty paragraph/list item. + +**Rejecting another author's insertion** - nest deletion inside their insertion: +```xml + + + their inserted text + + +``` + +**Restoring another author's deletion** - add insertion after (don't modify their deletion): +```xml + + deleted text + + + deleted text + +``` + +### Comments + +After running `comment.py` (see Step 2), add markers to document.xml. For replies, use `--parent` flag and nest markers inside the parent's. + +**CRITICAL: `` and `` are siblings of ``, never inside ``.** + +```xml + + + + deleted + + more text + + + + + + + text + + + + +``` + +### Images + +1. Add image file to `word/media/` +2. Add relationship to `word/_rels/document.xml.rels`: +```xml + +``` +3. Add content type to `[Content_Types].xml`: +```xml + +``` +4. Reference in document.xml: +```xml + + + + + + + + + + + + +``` + +--- + +## Dependencies + +- **pandoc**: Text extraction +- **docx**: `npm install -g docx` (new documents) +- **LibreOffice**: PDF conversion (auto-configured for sandboxed environments via `scripts/office/soffice.py`) +- **Poppler**: `pdftoppm` for images diff --git a/agentskills/docx/scripts/__init__.py b/agentskills/docx/scripts/__init__.py new file mode 100755 index 0000000..8b13789 --- /dev/null +++ b/agentskills/docx/scripts/__init__.py @@ -0,0 +1 @@ + diff --git a/agentskills/docx/scripts/accept_changes.py b/agentskills/docx/scripts/accept_changes.py new file mode 100644 index 0000000..8e36316 --- /dev/null +++ b/agentskills/docx/scripts/accept_changes.py @@ -0,0 +1,135 @@ +"""Accept all tracked changes in a DOCX file using LibreOffice. + +Requires LibreOffice (soffice) to be installed. +""" + +import argparse +import logging +import shutil +import subprocess +from pathlib import Path + +from office.soffice import get_soffice_env + +logger = logging.getLogger(__name__) + +LIBREOFFICE_PROFILE = "/tmp/libreoffice_docx_profile" +MACRO_DIR = f"{LIBREOFFICE_PROFILE}/user/basic/Standard" + +ACCEPT_CHANGES_MACRO = """ + + + Sub AcceptAllTrackedChanges() + Dim document As Object + Dim dispatcher As Object + + document = ThisComponent.CurrentController.Frame + dispatcher = createUnoService("com.sun.star.frame.DispatchHelper") + + dispatcher.executeDispatch(document, ".uno:AcceptAllTrackedChanges", "", 0, Array()) + ThisComponent.store() + ThisComponent.close(True) + End Sub +""" + + +def accept_changes( + input_file: str, + output_file: str, +) -> tuple[None, str]: + input_path = Path(input_file) + output_path = Path(output_file) + + if not input_path.exists(): + return None, f"Error: Input file not found: {input_file}" + + if not input_path.suffix.lower() == ".docx": + return None, f"Error: Input file is not a DOCX file: {input_file}" + + try: + output_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(input_path, output_path) + except Exception as e: + return None, f"Error: Failed to copy input file to output location: {e}" + + if not _setup_libreoffice_macro(): + return None, "Error: Failed to setup LibreOffice macro" + + cmd = [ + "soffice", + "--headless", + f"-env:UserInstallation=file://{LIBREOFFICE_PROFILE}", + "--norestore", + "vnd.sun.star.script:Standard.Module1.AcceptAllTrackedChanges?language=Basic&location=application", + str(output_path.absolute()), + ] + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=30, + check=False, + env=get_soffice_env(), + ) + except subprocess.TimeoutExpired: + return ( + None, + f"Successfully accepted all tracked changes: {input_file} -> {output_file}", + ) + + if result.returncode != 0: + return None, f"Error: LibreOffice failed: {result.stderr}" + + return ( + None, + f"Successfully accepted all tracked changes: {input_file} -> {output_file}", + ) + + +def _setup_libreoffice_macro() -> bool: + macro_dir = Path(MACRO_DIR) + macro_file = macro_dir / "Module1.xba" + + if macro_file.exists() and "AcceptAllTrackedChanges" in macro_file.read_text(): + return True + + if not macro_dir.exists(): + subprocess.run( + [ + "soffice", + "--headless", + f"-env:UserInstallation=file://{LIBREOFFICE_PROFILE}", + "--terminate_after_init", + ], + capture_output=True, + timeout=10, + check=False, + env=get_soffice_env(), + ) + macro_dir.mkdir(parents=True, exist_ok=True) + + try: + macro_file.write_text(ACCEPT_CHANGES_MACRO) + return True + except Exception as e: + logger.warning(f"Failed to setup LibreOffice macro: {e}") + return False + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Accept all tracked changes in a DOCX file" + ) + parser.add_argument("input_file", help="Input DOCX file with tracked changes") + parser.add_argument( + "output_file", help="Output DOCX file (clean, no tracked changes)" + ) + args = parser.parse_args() + + _, message = accept_changes(args.input_file, args.output_file) + print(message) + + if "Error" in message: + raise SystemExit(1) diff --git a/agentskills/docx/scripts/comment.py b/agentskills/docx/scripts/comment.py new file mode 100644 index 0000000..36e1c93 --- /dev/null +++ b/agentskills/docx/scripts/comment.py @@ -0,0 +1,318 @@ +"""Add comments to DOCX documents. + +Usage: + python comment.py unpacked/ 0 "Comment text" + python comment.py unpacked/ 1 "Reply text" --parent 0 + +Text should be pre-escaped XML (e.g., & for &, ’ for smart quotes). + +After running, add markers to document.xml: + + ... commented content ... + + +""" + +import argparse +import random +import shutil +import sys +from datetime import datetime, timezone +from pathlib import Path + +import defusedxml.minidom + +TEMPLATE_DIR = Path(__file__).parent / "templates" +NS = { + "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main", + "w14": "http://schemas.microsoft.com/office/word/2010/wordml", + "w15": "http://schemas.microsoft.com/office/word/2012/wordml", + "w16cid": "http://schemas.microsoft.com/office/word/2016/wordml/cid", + "w16cex": "http://schemas.microsoft.com/office/word/2018/wordml/cex", +} + +COMMENT_XML = """\ + + + + + + + + + + + + + {text} + + +""" + +COMMENT_MARKER_TEMPLATE = """ +Add to document.xml (markers must be direct children of w:p, never inside w:r): + + ... + + """ + +REPLY_MARKER_TEMPLATE = """ +Nest markers inside parent {pid}'s markers (markers must be direct children of w:p, never inside w:r): + + ... + + + """ + + +def _generate_hex_id() -> str: + return f"{random.randint(0, 0x7FFFFFFE):08X}" + + +SMART_QUOTE_ENTITIES = { + "\u201c": "“", + "\u201d": "”", + "\u2018": "‘", + "\u2019": "’", +} + + +def _encode_smart_quotes(text: str) -> str: + for char, entity in SMART_QUOTE_ENTITIES.items(): + text = text.replace(char, entity) + return text + + +def _append_xml(xml_path: Path, root_tag: str, content: str) -> None: + dom = defusedxml.minidom.parseString(xml_path.read_text(encoding="utf-8")) + root = dom.getElementsByTagName(root_tag)[0] + ns_attrs = " ".join(f'xmlns:{k}="{v}"' for k, v in NS.items()) + wrapper_dom = defusedxml.minidom.parseString(f"{content}") + for child in wrapper_dom.documentElement.childNodes: + if child.nodeType == child.ELEMENT_NODE: + root.appendChild(dom.importNode(child, True)) + output = _encode_smart_quotes(dom.toxml(encoding="UTF-8").decode("utf-8")) + xml_path.write_text(output, encoding="utf-8") + + +def _find_para_id(comments_path: Path, comment_id: int) -> str | None: + dom = defusedxml.minidom.parseString(comments_path.read_text(encoding="utf-8")) + for c in dom.getElementsByTagName("w:comment"): + if c.getAttribute("w:id") == str(comment_id): + for p in c.getElementsByTagName("w:p"): + if pid := p.getAttribute("w14:paraId"): + return pid + return None + + +def _get_next_rid(rels_path: Path) -> int: + dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8")) + max_rid = 0 + for rel in dom.getElementsByTagName("Relationship"): + rid = rel.getAttribute("Id") + if rid and rid.startswith("rId"): + try: + max_rid = max(max_rid, int(rid[3:])) + except ValueError: + pass + return max_rid + 1 + + +def _has_relationship(rels_path: Path, target: str) -> bool: + dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8")) + for rel in dom.getElementsByTagName("Relationship"): + if rel.getAttribute("Target") == target: + return True + return False + + +def _has_content_type(ct_path: Path, part_name: str) -> bool: + dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8")) + for override in dom.getElementsByTagName("Override"): + if override.getAttribute("PartName") == part_name: + return True + return False + + +def _ensure_comment_relationships(unpacked_dir: Path) -> None: + rels_path = unpacked_dir / "word" / "_rels" / "document.xml.rels" + if not rels_path.exists(): + return + + if _has_relationship(rels_path, "comments.xml"): + return + + dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8")) + root = dom.documentElement + next_rid = _get_next_rid(rels_path) + + rels = [ + ( + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments", + "comments.xml", + ), + ( + "http://schemas.microsoft.com/office/2011/relationships/commentsExtended", + "commentsExtended.xml", + ), + ( + "http://schemas.microsoft.com/office/2016/09/relationships/commentsIds", + "commentsIds.xml", + ), + ( + "http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible", + "commentsExtensible.xml", + ), + ] + + for rel_type, target in rels: + rel = dom.createElement("Relationship") + rel.setAttribute("Id", f"rId{next_rid}") + rel.setAttribute("Type", rel_type) + rel.setAttribute("Target", target) + root.appendChild(rel) + next_rid += 1 + + rels_path.write_bytes(dom.toxml(encoding="UTF-8")) + + +def _ensure_comment_content_types(unpacked_dir: Path) -> None: + ct_path = unpacked_dir / "[Content_Types].xml" + if not ct_path.exists(): + return + + if _has_content_type(ct_path, "/word/comments.xml"): + return + + dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8")) + root = dom.documentElement + + overrides = [ + ( + "/word/comments.xml", + "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml", + ), + ( + "/word/commentsExtended.xml", + "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml", + ), + ( + "/word/commentsIds.xml", + "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml", + ), + ( + "/word/commentsExtensible.xml", + "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml", + ), + ] + + for part_name, content_type in overrides: + override = dom.createElement("Override") + override.setAttribute("PartName", part_name) + override.setAttribute("ContentType", content_type) + root.appendChild(override) + + ct_path.write_bytes(dom.toxml(encoding="UTF-8")) + + +def add_comment( + unpacked_dir: str, + comment_id: int, + text: str, + author: str = "Claude", + initials: str = "C", + parent_id: int | None = None, +) -> tuple[str, str]: + word = Path(unpacked_dir) / "word" + if not word.exists(): + return "", f"Error: {word} not found" + + para_id, durable_id = _generate_hex_id(), _generate_hex_id() + ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + comments = word / "comments.xml" + first_comment = not comments.exists() + if first_comment: + shutil.copy(TEMPLATE_DIR / "comments.xml", comments) + _ensure_comment_relationships(Path(unpacked_dir)) + _ensure_comment_content_types(Path(unpacked_dir)) + _append_xml( + comments, + "w:comments", + COMMENT_XML.format( + id=comment_id, + author=author, + date=ts, + initials=initials, + para_id=para_id, + text=text, + ), + ) + + ext = word / "commentsExtended.xml" + if not ext.exists(): + shutil.copy(TEMPLATE_DIR / "commentsExtended.xml", ext) + if parent_id is not None: + parent_para = _find_para_id(comments, parent_id) + if not parent_para: + return "", f"Error: Parent comment {parent_id} not found" + _append_xml( + ext, + "w15:commentsEx", + f'', + ) + else: + _append_xml( + ext, + "w15:commentsEx", + f'', + ) + + ids = word / "commentsIds.xml" + if not ids.exists(): + shutil.copy(TEMPLATE_DIR / "commentsIds.xml", ids) + _append_xml( + ids, + "w16cid:commentsIds", + f'', + ) + + extensible = word / "commentsExtensible.xml" + if not extensible.exists(): + shutil.copy(TEMPLATE_DIR / "commentsExtensible.xml", extensible) + _append_xml( + extensible, + "w16cex:commentsExtensible", + f'', + ) + + action = "reply" if parent_id is not None else "comment" + return para_id, f"Added {action} {comment_id} (para_id={para_id})" + + +if __name__ == "__main__": + p = argparse.ArgumentParser(description="Add comments to DOCX documents") + p.add_argument("unpacked_dir", help="Unpacked DOCX directory") + p.add_argument("comment_id", type=int, help="Comment ID (must be unique)") + p.add_argument("text", help="Comment text") + p.add_argument("--author", default="Claude", help="Author name") + p.add_argument("--initials", default="C", help="Author initials") + p.add_argument("--parent", type=int, help="Parent comment ID (for replies)") + args = p.parse_args() + + para_id, msg = add_comment( + args.unpacked_dir, + args.comment_id, + args.text, + args.author, + args.initials, + args.parent, + ) + print(msg) + if "Error" in msg: + sys.exit(1) + cid = args.comment_id + if args.parent is not None: + print(REPLY_MARKER_TEMPLATE.format(pid=args.parent, cid=cid)) + else: + print(COMMENT_MARKER_TEMPLATE.format(cid=cid)) diff --git a/agentskills/docx/scripts/office/helpers/__init__.py b/agentskills/docx/scripts/office/helpers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agentskills/docx/scripts/office/helpers/merge_runs.py b/agentskills/docx/scripts/office/helpers/merge_runs.py new file mode 100644 index 0000000..ad7c25e --- /dev/null +++ b/agentskills/docx/scripts/office/helpers/merge_runs.py @@ -0,0 +1,199 @@ +"""Merge adjacent runs with identical formatting in DOCX. + +Merges adjacent elements that have identical properties. +Works on runs in paragraphs and inside tracked changes (, ). + +Also: +- Removes rsid attributes from runs (revision metadata that doesn't affect rendering) +- Removes proofErr elements (spell/grammar markers that block merging) +""" + +from pathlib import Path + +import defusedxml.minidom + + +def merge_runs(input_dir: str) -> tuple[int, str]: + doc_xml = Path(input_dir) / "word" / "document.xml" + + if not doc_xml.exists(): + return 0, f"Error: {doc_xml} not found" + + try: + dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8")) + root = dom.documentElement + + _remove_elements(root, "proofErr") + _strip_run_rsid_attrs(root) + + containers = {run.parentNode for run in _find_elements(root, "r")} + + merge_count = 0 + for container in containers: + merge_count += _merge_runs_in(container) + + doc_xml.write_bytes(dom.toxml(encoding="UTF-8")) + return merge_count, f"Merged {merge_count} runs" + + except Exception as e: + return 0, f"Error: {e}" + + + + +def _find_elements(root, tag: str) -> list: + results = [] + + def traverse(node): + if node.nodeType == node.ELEMENT_NODE: + name = node.localName or node.tagName + if name == tag or name.endswith(f":{tag}"): + results.append(node) + for child in node.childNodes: + traverse(child) + + traverse(root) + return results + + +def _get_child(parent, tag: str): + for child in parent.childNodes: + if child.nodeType == child.ELEMENT_NODE: + name = child.localName or child.tagName + if name == tag or name.endswith(f":{tag}"): + return child + return None + + +def _get_children(parent, tag: str) -> list: + results = [] + for child in parent.childNodes: + if child.nodeType == child.ELEMENT_NODE: + name = child.localName or child.tagName + if name == tag or name.endswith(f":{tag}"): + results.append(child) + return results + + +def _is_adjacent(elem1, elem2) -> bool: + node = elem1.nextSibling + while node: + if node == elem2: + return True + if node.nodeType == node.ELEMENT_NODE: + return False + if node.nodeType == node.TEXT_NODE and node.data.strip(): + return False + node = node.nextSibling + return False + + + + +def _remove_elements(root, tag: str): + for elem in _find_elements(root, tag): + if elem.parentNode: + elem.parentNode.removeChild(elem) + + +def _strip_run_rsid_attrs(root): + for run in _find_elements(root, "r"): + for attr in list(run.attributes.values()): + if "rsid" in attr.name.lower(): + run.removeAttribute(attr.name) + + + + +def _merge_runs_in(container) -> int: + merge_count = 0 + run = _first_child_run(container) + + while run: + while True: + next_elem = _next_element_sibling(run) + if next_elem and _is_run(next_elem) and _can_merge(run, next_elem): + _merge_run_content(run, next_elem) + container.removeChild(next_elem) + merge_count += 1 + else: + break + + _consolidate_text(run) + run = _next_sibling_run(run) + + return merge_count + + +def _first_child_run(container): + for child in container.childNodes: + if child.nodeType == child.ELEMENT_NODE and _is_run(child): + return child + return None + + +def _next_element_sibling(node): + sibling = node.nextSibling + while sibling: + if sibling.nodeType == sibling.ELEMENT_NODE: + return sibling + sibling = sibling.nextSibling + return None + + +def _next_sibling_run(node): + sibling = node.nextSibling + while sibling: + if sibling.nodeType == sibling.ELEMENT_NODE: + if _is_run(sibling): + return sibling + sibling = sibling.nextSibling + return None + + +def _is_run(node) -> bool: + name = node.localName or node.tagName + return name == "r" or name.endswith(":r") + + +def _can_merge(run1, run2) -> bool: + rpr1 = _get_child(run1, "rPr") + rpr2 = _get_child(run2, "rPr") + + if (rpr1 is None) != (rpr2 is None): + return False + if rpr1 is None: + return True + return rpr1.toxml() == rpr2.toxml() + + +def _merge_run_content(target, source): + for child in list(source.childNodes): + if child.nodeType == child.ELEMENT_NODE: + name = child.localName or child.tagName + if name != "rPr" and not name.endswith(":rPr"): + target.appendChild(child) + + +def _consolidate_text(run): + t_elements = _get_children(run, "t") + + for i in range(len(t_elements) - 1, 0, -1): + curr, prev = t_elements[i], t_elements[i - 1] + + if _is_adjacent(prev, curr): + prev_text = prev.firstChild.data if prev.firstChild else "" + curr_text = curr.firstChild.data if curr.firstChild else "" + merged = prev_text + curr_text + + if prev.firstChild: + prev.firstChild.data = merged + else: + prev.appendChild(run.ownerDocument.createTextNode(merged)) + + if merged.startswith(" ") or merged.endswith(" "): + prev.setAttribute("xml:space", "preserve") + elif prev.hasAttribute("xml:space"): + prev.removeAttribute("xml:space") + + run.removeChild(curr) diff --git a/agentskills/docx/scripts/office/helpers/simplify_redlines.py b/agentskills/docx/scripts/office/helpers/simplify_redlines.py new file mode 100644 index 0000000..db963bb --- /dev/null +++ b/agentskills/docx/scripts/office/helpers/simplify_redlines.py @@ -0,0 +1,197 @@ +"""Simplify tracked changes by merging adjacent w:ins or w:del elements. + +Merges adjacent elements from the same author into a single element. +Same for elements. This makes heavily-redlined documents easier to +work with by reducing the number of tracked change wrappers. + +Rules: +- Only merges w:ins with w:ins, w:del with w:del (same element type) +- Only merges if same author (ignores timestamp differences) +- Only merges if truly adjacent (only whitespace between them) +""" + +import xml.etree.ElementTree as ET +import zipfile +from pathlib import Path + +import defusedxml.minidom + +WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + + +def simplify_redlines(input_dir: str) -> tuple[int, str]: + doc_xml = Path(input_dir) / "word" / "document.xml" + + if not doc_xml.exists(): + return 0, f"Error: {doc_xml} not found" + + try: + dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8")) + root = dom.documentElement + + merge_count = 0 + + containers = _find_elements(root, "p") + _find_elements(root, "tc") + + for container in containers: + merge_count += _merge_tracked_changes_in(container, "ins") + merge_count += _merge_tracked_changes_in(container, "del") + + doc_xml.write_bytes(dom.toxml(encoding="UTF-8")) + return merge_count, f"Simplified {merge_count} tracked changes" + + except Exception as e: + return 0, f"Error: {e}" + + +def _merge_tracked_changes_in(container, tag: str) -> int: + merge_count = 0 + + tracked = [ + child + for child in container.childNodes + if child.nodeType == child.ELEMENT_NODE and _is_element(child, tag) + ] + + if len(tracked) < 2: + return 0 + + i = 0 + while i < len(tracked) - 1: + curr = tracked[i] + next_elem = tracked[i + 1] + + if _can_merge_tracked(curr, next_elem): + _merge_tracked_content(curr, next_elem) + container.removeChild(next_elem) + tracked.pop(i + 1) + merge_count += 1 + else: + i += 1 + + return merge_count + + +def _is_element(node, tag: str) -> bool: + name = node.localName or node.tagName + return name == tag or name.endswith(f":{tag}") + + +def _get_author(elem) -> str: + author = elem.getAttribute("w:author") + if not author: + for attr in elem.attributes.values(): + if attr.localName == "author" or attr.name.endswith(":author"): + return attr.value + return author + + +def _can_merge_tracked(elem1, elem2) -> bool: + if _get_author(elem1) != _get_author(elem2): + return False + + node = elem1.nextSibling + while node and node != elem2: + if node.nodeType == node.ELEMENT_NODE: + return False + if node.nodeType == node.TEXT_NODE and node.data.strip(): + return False + node = node.nextSibling + + return True + + +def _merge_tracked_content(target, source): + while source.firstChild: + child = source.firstChild + source.removeChild(child) + target.appendChild(child) + + +def _find_elements(root, tag: str) -> list: + results = [] + + def traverse(node): + if node.nodeType == node.ELEMENT_NODE: + name = node.localName or node.tagName + if name == tag or name.endswith(f":{tag}"): + results.append(node) + for child in node.childNodes: + traverse(child) + + traverse(root) + return results + + +def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]: + if not doc_xml_path.exists(): + return {} + + try: + tree = ET.parse(doc_xml_path) + root = tree.getroot() + except ET.ParseError: + return {} + + namespaces = {"w": WORD_NS} + author_attr = f"{{{WORD_NS}}}author" + + authors: dict[str, int] = {} + for tag in ["ins", "del"]: + for elem in root.findall(f".//w:{tag}", namespaces): + author = elem.get(author_attr) + if author: + authors[author] = authors.get(author, 0) + 1 + + return authors + + +def _get_authors_from_docx(docx_path: Path) -> dict[str, int]: + try: + with zipfile.ZipFile(docx_path, "r") as zf: + if "word/document.xml" not in zf.namelist(): + return {} + with zf.open("word/document.xml") as f: + tree = ET.parse(f) + root = tree.getroot() + + namespaces = {"w": WORD_NS} + author_attr = f"{{{WORD_NS}}}author" + + authors: dict[str, int] = {} + for tag in ["ins", "del"]: + for elem in root.findall(f".//w:{tag}", namespaces): + author = elem.get(author_attr) + if author: + authors[author] = authors.get(author, 0) + 1 + return authors + except (zipfile.BadZipFile, ET.ParseError): + return {} + + +def infer_author(modified_dir: Path, original_docx: Path, default: str = "Claude") -> str: + modified_xml = modified_dir / "word" / "document.xml" + modified_authors = get_tracked_change_authors(modified_xml) + + if not modified_authors: + return default + + original_authors = _get_authors_from_docx(original_docx) + + new_changes: dict[str, int] = {} + for author, count in modified_authors.items(): + original_count = original_authors.get(author, 0) + diff = count - original_count + if diff > 0: + new_changes[author] = diff + + if not new_changes: + return default + + if len(new_changes) == 1: + return next(iter(new_changes)) + + raise ValueError( + f"Multiple authors added new changes: {new_changes}. " + "Cannot infer which author to validate." + ) diff --git a/agentskills/docx/scripts/office/pack.py b/agentskills/docx/scripts/office/pack.py new file mode 100755 index 0000000..db29ed8 --- /dev/null +++ b/agentskills/docx/scripts/office/pack.py @@ -0,0 +1,159 @@ +"""Pack a directory into a DOCX, PPTX, or XLSX file. + +Validates with auto-repair, condenses XML formatting, and creates the Office file. + +Usage: + python pack.py [--original ] [--validate true|false] + +Examples: + python pack.py unpacked/ output.docx --original input.docx + python pack.py unpacked/ output.pptx --validate false +""" + +import argparse +import sys +import shutil +import tempfile +import zipfile +from pathlib import Path + +import defusedxml.minidom + +from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator + +def pack( + input_directory: str, + output_file: str, + original_file: str | None = None, + validate: bool = True, + infer_author_func=None, +) -> tuple[None, str]: + input_dir = Path(input_directory) + output_path = Path(output_file) + suffix = output_path.suffix.lower() + + if not input_dir.is_dir(): + return None, f"Error: {input_dir} is not a directory" + + if suffix not in {".docx", ".pptx", ".xlsx"}: + return None, f"Error: {output_file} must be a .docx, .pptx, or .xlsx file" + + if validate and original_file: + original_path = Path(original_file) + if original_path.exists(): + success, output = _run_validation( + input_dir, original_path, suffix, infer_author_func + ) + if output: + print(output) + if not success: + return None, f"Error: Validation failed for {input_dir}" + + with tempfile.TemporaryDirectory() as temp_dir: + temp_content_dir = Path(temp_dir) / "content" + shutil.copytree(input_dir, temp_content_dir) + + for pattern in ["*.xml", "*.rels"]: + for xml_file in temp_content_dir.rglob(pattern): + _condense_xml(xml_file) + + output_path.parent.mkdir(parents=True, exist_ok=True) + with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf: + for f in temp_content_dir.rglob("*"): + if f.is_file(): + zf.write(f, f.relative_to(temp_content_dir)) + + return None, f"Successfully packed {input_dir} to {output_file}" + + +def _run_validation( + unpacked_dir: Path, + original_file: Path, + suffix: str, + infer_author_func=None, +) -> tuple[bool, str | None]: + output_lines = [] + validators = [] + + if suffix == ".docx": + author = "Claude" + if infer_author_func: + try: + author = infer_author_func(unpacked_dir, original_file) + except ValueError as e: + print(f"Warning: {e} Using default author 'Claude'.", file=sys.stderr) + + validators = [ + DOCXSchemaValidator(unpacked_dir, original_file), + RedliningValidator(unpacked_dir, original_file, author=author), + ] + elif suffix == ".pptx": + validators = [PPTXSchemaValidator(unpacked_dir, original_file)] + + if not validators: + return True, None + + total_repairs = sum(v.repair() for v in validators) + if total_repairs: + output_lines.append(f"Auto-repaired {total_repairs} issue(s)") + + success = all(v.validate() for v in validators) + + if success: + output_lines.append("All validations PASSED!") + + return success, "\n".join(output_lines) if output_lines else None + + +def _condense_xml(xml_file: Path) -> None: + try: + with open(xml_file, encoding="utf-8") as f: + dom = defusedxml.minidom.parse(f) + + for element in dom.getElementsByTagName("*"): + if element.tagName.endswith(":t"): + continue + + for child in list(element.childNodes): + if ( + child.nodeType == child.TEXT_NODE + and child.nodeValue + and child.nodeValue.strip() == "" + ) or child.nodeType == child.COMMENT_NODE: + element.removeChild(child) + + xml_file.write_bytes(dom.toxml(encoding="UTF-8")) + except Exception as e: + print(f"ERROR: Failed to parse {xml_file.name}: {e}", file=sys.stderr) + raise + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Pack a directory into a DOCX, PPTX, or XLSX file" + ) + parser.add_argument("input_directory", help="Unpacked Office document directory") + parser.add_argument("output_file", help="Output Office file (.docx/.pptx/.xlsx)") + parser.add_argument( + "--original", + help="Original file for validation comparison", + ) + parser.add_argument( + "--validate", + type=lambda x: x.lower() == "true", + default=True, + metavar="true|false", + help="Run validation with auto-repair (default: true)", + ) + args = parser.parse_args() + + _, message = pack( + args.input_directory, + args.output_file, + original_file=args.original, + validate=args.validate, + ) + print(message) + + if "Error" in message: + sys.exit(1) diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd new file mode 100644 index 0000000..6454ef9 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd @@ -0,0 +1,1499 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd new file mode 100644 index 0000000..afa4f46 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd @@ -0,0 +1,146 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd new file mode 100644 index 0000000..64e66b8 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd @@ -0,0 +1,1085 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd new file mode 100644 index 0000000..687eea8 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd @@ -0,0 +1,11 @@ + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd new file mode 100644 index 0000000..6ac81b0 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd @@ -0,0 +1,3081 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd new file mode 100644 index 0000000..1dbf051 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd new file mode 100644 index 0000000..f1af17d --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd @@ -0,0 +1,185 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd new file mode 100644 index 0000000..0a185ab --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd @@ -0,0 +1,287 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd new file mode 100644 index 0000000..14ef488 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd @@ -0,0 +1,1676 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd new file mode 100644 index 0000000..c20f3bf --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd new file mode 100644 index 0000000..ac60252 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd @@ -0,0 +1,144 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd new file mode 100644 index 0000000..424b8ba --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd @@ -0,0 +1,174 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd new file mode 100644 index 0000000..2bddce2 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd new file mode 100644 index 0000000..8a8c18b --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd new file mode 100644 index 0000000..5c42706 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd new file mode 100644 index 0000000..853c341 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd new file mode 100644 index 0000000..da835ee --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd @@ -0,0 +1,195 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd new file mode 100644 index 0000000..87ad265 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd @@ -0,0 +1,582 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd new file mode 100644 index 0000000..9e86f1b --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd new file mode 100644 index 0000000..d0be42e --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd @@ -0,0 +1,4439 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd new file mode 100644 index 0000000..8821dd1 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd @@ -0,0 +1,570 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd new file mode 100644 index 0000000..ca2575c --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd @@ -0,0 +1,509 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd new file mode 100644 index 0000000..dd079e6 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd @@ -0,0 +1,12 @@ + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd new file mode 100644 index 0000000..3dd6cf6 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd new file mode 100644 index 0000000..f1041e3 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd @@ -0,0 +1,96 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd new file mode 100644 index 0000000..9c5b7a6 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd @@ -0,0 +1,3646 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd new file mode 100644 index 0000000..0f13678 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd @@ -0,0 +1,116 @@ + + + + + + See http://www.w3.org/XML/1998/namespace.html and + http://www.w3.org/TR/REC-xml for information about this namespace. + + This schema document describes the XML namespace, in a form + suitable for import by other schema documents. + + Note that local names in this namespace are intended to be defined + only by the World Wide Web Consortium or its subgroups. The + following names are currently defined in this namespace and should + not be used with conflicting semantics by any Working Group, + specification, or document instance: + + base (as an attribute name): denotes an attribute whose value + provides a URI to be used as the base for interpreting any + relative URIs in the scope of the element on which it + appears; its value is inherited. This name is reserved + by virtue of its definition in the XML Base specification. + + lang (as an attribute name): denotes an attribute whose value + is a language code for the natural language of the content of + any element; its value is inherited. This name is reserved + by virtue of its definition in the XML specification. + + space (as an attribute name): denotes an attribute whose + value is a keyword indicating what whitespace processing + discipline is intended for the content of the element; its + value is inherited. This name is reserved by virtue of its + definition in the XML specification. + + Father (in any context at all): denotes Jon Bosak, the chair of + the original XML Working Group. This name is reserved by + the following decision of the W3C XML Plenary and + XML Coordination groups: + + In appreciation for his vision, leadership and dedication + the W3C XML Plenary on this 10th day of February, 2000 + reserves for Jon Bosak in perpetuity the XML name + xml:Father + + + + + This schema defines attributes and an attribute group + suitable for use by + schemas wishing to allow xml:base, xml:lang or xml:space attributes + on elements they define. + + To enable this, such a schema must import this schema + for the XML namespace, e.g. as follows: + <schema . . .> + . . . + <import namespace="http://www.w3.org/XML/1998/namespace" + schemaLocation="http://www.w3.org/2001/03/xml.xsd"/> + + Subsequently, qualified reference to any of the attributes + or the group defined below will have the desired effect, e.g. + + <type . . .> + . . . + <attributeGroup ref="xml:specialAttrs"/> + + will define a type which will schema-validate an instance + element with any of those attributes + + + + In keeping with the XML Schema WG's standard versioning + policy, this schema document will persist at + http://www.w3.org/2001/03/xml.xsd. + At the date of issue it can also be found at + http://www.w3.org/2001/xml.xsd. + The schema document at that URI may however change in the future, + in order to remain compatible with the latest version of XML Schema + itself. In other words, if the XML Schema namespace changes, the version + of this document at + http://www.w3.org/2001/xml.xsd will change + accordingly; the version at + http://www.w3.org/2001/03/xml.xsd will not change. + + + + + + In due course, we should install the relevant ISO 2- and 3-letter + codes as the enumerated possible values . . . + + + + + + + + + + + + + + + See http://www.w3.org/TR/xmlbase/ for + information about this attribute. + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd b/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd new file mode 100644 index 0000000..a6de9d2 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd b/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd new file mode 100644 index 0000000..10e978b --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd b/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd new file mode 100644 index 0000000..4248bf7 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd b/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd new file mode 100644 index 0000000..5649746 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/mce/mc.xsd b/agentskills/docx/scripts/office/schemas/mce/mc.xsd new file mode 100644 index 0000000..ef72545 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/mce/mc.xsd @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/microsoft/wml-2010.xsd b/agentskills/docx/scripts/office/schemas/microsoft/wml-2010.xsd new file mode 100644 index 0000000..f65f777 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/microsoft/wml-2010.xsd @@ -0,0 +1,560 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/microsoft/wml-2012.xsd b/agentskills/docx/scripts/office/schemas/microsoft/wml-2012.xsd new file mode 100644 index 0000000..6b00755 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/microsoft/wml-2012.xsd @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/microsoft/wml-2018.xsd b/agentskills/docx/scripts/office/schemas/microsoft/wml-2018.xsd new file mode 100644 index 0000000..f321d33 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/microsoft/wml-2018.xsd @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd b/agentskills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd new file mode 100644 index 0000000..364c6a9 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd b/agentskills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd new file mode 100644 index 0000000..fed9d15 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/agentskills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd b/agentskills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd new file mode 100644 index 0000000..680cf15 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd @@ -0,0 +1,4 @@ + + + + diff --git a/agentskills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd b/agentskills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd new file mode 100644 index 0000000..89ada90 --- /dev/null +++ b/agentskills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/agentskills/docx/scripts/office/soffice.py b/agentskills/docx/scripts/office/soffice.py new file mode 100644 index 0000000..c7f7e32 --- /dev/null +++ b/agentskills/docx/scripts/office/soffice.py @@ -0,0 +1,183 @@ +""" +Helper for running LibreOffice (soffice) in environments where AF_UNIX +sockets may be blocked (e.g., sandboxed VMs). Detects the restriction +at runtime and applies an LD_PRELOAD shim if needed. + +Usage: + from office.soffice import run_soffice, get_soffice_env + + # Option 1 – run soffice directly + result = run_soffice(["--headless", "--convert-to", "pdf", "input.docx"]) + + # Option 2 – get env dict for your own subprocess calls + env = get_soffice_env() + subprocess.run(["soffice", ...], env=env) +""" + +import os +import socket +import subprocess +import tempfile +from pathlib import Path + + +def get_soffice_env() -> dict: + env = os.environ.copy() + env["SAL_USE_VCLPLUGIN"] = "svp" + + if _needs_shim(): + shim = _ensure_shim() + env["LD_PRELOAD"] = str(shim) + + return env + + +def run_soffice(args: list[str], **kwargs) -> subprocess.CompletedProcess: + env = get_soffice_env() + return subprocess.run(["soffice"] + args, env=env, **kwargs) + + + +_SHIM_SO = Path(tempfile.gettempdir()) / "lo_socket_shim.so" + + +def _needs_shim() -> bool: + try: + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.close() + return False + except OSError: + return True + + +def _ensure_shim() -> Path: + if _SHIM_SO.exists(): + return _SHIM_SO + + src = Path(tempfile.gettempdir()) / "lo_socket_shim.c" + src.write_text(_SHIM_SOURCE) + subprocess.run( + ["gcc", "-shared", "-fPIC", "-o", str(_SHIM_SO), str(src), "-ldl"], + check=True, + capture_output=True, + ) + src.unlink() + return _SHIM_SO + + + +_SHIM_SOURCE = r""" +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +static int (*real_socket)(int, int, int); +static int (*real_socketpair)(int, int, int, int[2]); +static int (*real_listen)(int, int); +static int (*real_accept)(int, struct sockaddr *, socklen_t *); +static int (*real_close)(int); +static int (*real_read)(int, void *, size_t); + +/* Per-FD bookkeeping (FDs >= 1024 are passed through unshimmed). */ +static int is_shimmed[1024]; +static int peer_of[1024]; +static int wake_r[1024]; /* accept() blocks reading this */ +static int wake_w[1024]; /* close() writes to this */ +static int listener_fd = -1; /* FD that received listen() */ + +__attribute__((constructor)) +static void init(void) { + real_socket = dlsym(RTLD_NEXT, "socket"); + real_socketpair = dlsym(RTLD_NEXT, "socketpair"); + real_listen = dlsym(RTLD_NEXT, "listen"); + real_accept = dlsym(RTLD_NEXT, "accept"); + real_close = dlsym(RTLD_NEXT, "close"); + real_read = dlsym(RTLD_NEXT, "read"); + for (int i = 0; i < 1024; i++) { + peer_of[i] = -1; + wake_r[i] = -1; + wake_w[i] = -1; + } +} + +/* ---- socket ---------------------------------------------------------- */ +int socket(int domain, int type, int protocol) { + if (domain == AF_UNIX) { + int fd = real_socket(domain, type, protocol); + if (fd >= 0) return fd; + /* socket(AF_UNIX) blocked – fall back to socketpair(). */ + int sv[2]; + if (real_socketpair(domain, type, protocol, sv) == 0) { + if (sv[0] >= 0 && sv[0] < 1024) { + is_shimmed[sv[0]] = 1; + peer_of[sv[0]] = sv[1]; + int wp[2]; + if (pipe(wp) == 0) { + wake_r[sv[0]] = wp[0]; + wake_w[sv[0]] = wp[1]; + } + } + return sv[0]; + } + errno = EPERM; + return -1; + } + return real_socket(domain, type, protocol); +} + +/* ---- listen ---------------------------------------------------------- */ +int listen(int sockfd, int backlog) { + if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) { + listener_fd = sockfd; + return 0; + } + return real_listen(sockfd, backlog); +} + +/* ---- accept ---------------------------------------------------------- */ +int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) { + if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) { + /* Block until close() writes to the wake pipe. */ + if (wake_r[sockfd] >= 0) { + char buf; + real_read(wake_r[sockfd], &buf, 1); + } + errno = ECONNABORTED; + return -1; + } + return real_accept(sockfd, addr, addrlen); +} + +/* ---- close ----------------------------------------------------------- */ +int close(int fd) { + if (fd >= 0 && fd < 1024 && is_shimmed[fd]) { + int was_listener = (fd == listener_fd); + is_shimmed[fd] = 0; + + if (wake_w[fd] >= 0) { /* unblock accept() */ + char c = 0; + write(wake_w[fd], &c, 1); + real_close(wake_w[fd]); + wake_w[fd] = -1; + } + if (wake_r[fd] >= 0) { real_close(wake_r[fd]); wake_r[fd] = -1; } + if (peer_of[fd] >= 0) { real_close(peer_of[fd]); peer_of[fd] = -1; } + + if (was_listener) + _exit(0); /* conversion done – exit */ + } + return real_close(fd); +} +""" + + + +if __name__ == "__main__": + import sys + result = run_soffice(sys.argv[1:]) + sys.exit(result.returncode) diff --git a/agentskills/docx/scripts/office/unpack.py b/agentskills/docx/scripts/office/unpack.py new file mode 100755 index 0000000..0015253 --- /dev/null +++ b/agentskills/docx/scripts/office/unpack.py @@ -0,0 +1,132 @@ +"""Unpack Office files (DOCX, PPTX, XLSX) for editing. + +Extracts the ZIP archive, pretty-prints XML files, and optionally: +- Merges adjacent runs with identical formatting (DOCX only) +- Simplifies adjacent tracked changes from same author (DOCX only) + +Usage: + python unpack.py [options] + +Examples: + python unpack.py document.docx unpacked/ + python unpack.py presentation.pptx unpacked/ + python unpack.py document.docx unpacked/ --merge-runs false +""" + +import argparse +import sys +import zipfile +from pathlib import Path + +import defusedxml.minidom + +from helpers.merge_runs import merge_runs as do_merge_runs +from helpers.simplify_redlines import simplify_redlines as do_simplify_redlines + +SMART_QUOTE_REPLACEMENTS = { + "\u201c": "“", + "\u201d": "”", + "\u2018": "‘", + "\u2019": "’", +} + + +def unpack( + input_file: str, + output_directory: str, + merge_runs: bool = True, + simplify_redlines: bool = True, +) -> tuple[None, str]: + input_path = Path(input_file) + output_path = Path(output_directory) + suffix = input_path.suffix.lower() + + if not input_path.exists(): + return None, f"Error: {input_file} does not exist" + + if suffix not in {".docx", ".pptx", ".xlsx"}: + return None, f"Error: {input_file} must be a .docx, .pptx, or .xlsx file" + + try: + output_path.mkdir(parents=True, exist_ok=True) + + with zipfile.ZipFile(input_path, "r") as zf: + zf.extractall(output_path) + + xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels")) + for xml_file in xml_files: + _pretty_print_xml(xml_file) + + message = f"Unpacked {input_file} ({len(xml_files)} XML files)" + + if suffix == ".docx": + if simplify_redlines: + simplify_count, _ = do_simplify_redlines(str(output_path)) + message += f", simplified {simplify_count} tracked changes" + + if merge_runs: + merge_count, _ = do_merge_runs(str(output_path)) + message += f", merged {merge_count} runs" + + for xml_file in xml_files: + _escape_smart_quotes(xml_file) + + return None, message + + except zipfile.BadZipFile: + return None, f"Error: {input_file} is not a valid Office file" + except Exception as e: + return None, f"Error unpacking: {e}" + + +def _pretty_print_xml(xml_file: Path) -> None: + try: + content = xml_file.read_text(encoding="utf-8") + dom = defusedxml.minidom.parseString(content) + xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="utf-8")) + except Exception: + pass + + +def _escape_smart_quotes(xml_file: Path) -> None: + try: + content = xml_file.read_text(encoding="utf-8") + for char, entity in SMART_QUOTE_REPLACEMENTS.items(): + content = content.replace(char, entity) + xml_file.write_text(content, encoding="utf-8") + except Exception: + pass + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Unpack an Office file (DOCX, PPTX, XLSX) for editing" + ) + parser.add_argument("input_file", help="Office file to unpack") + parser.add_argument("output_directory", help="Output directory") + parser.add_argument( + "--merge-runs", + type=lambda x: x.lower() == "true", + default=True, + metavar="true|false", + help="Merge adjacent runs with identical formatting (DOCX only, default: true)", + ) + parser.add_argument( + "--simplify-redlines", + type=lambda x: x.lower() == "true", + default=True, + metavar="true|false", + help="Merge adjacent tracked changes from same author (DOCX only, default: true)", + ) + args = parser.parse_args() + + _, message = unpack( + args.input_file, + args.output_directory, + merge_runs=args.merge_runs, + simplify_redlines=args.simplify_redlines, + ) + print(message) + + if "Error" in message: + sys.exit(1) diff --git a/agentskills/docx/scripts/office/validate.py b/agentskills/docx/scripts/office/validate.py new file mode 100755 index 0000000..03b01f6 --- /dev/null +++ b/agentskills/docx/scripts/office/validate.py @@ -0,0 +1,111 @@ +""" +Command line tool to validate Office document XML files against XSD schemas and tracked changes. + +Usage: + python validate.py [--original ] [--auto-repair] [--author NAME] + +The first argument can be either: +- An unpacked directory containing the Office document XML files +- A packed Office file (.docx/.pptx/.xlsx) which will be unpacked to a temp directory + +Auto-repair fixes: +- paraId/durableId values that exceed OOXML limits +- Missing xml:space="preserve" on w:t elements with whitespace +""" + +import argparse +import sys +import tempfile +import zipfile +from pathlib import Path + +from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator + + +def main(): + parser = argparse.ArgumentParser(description="Validate Office document XML files") + parser.add_argument( + "path", + help="Path to unpacked directory or packed Office file (.docx/.pptx/.xlsx)", + ) + parser.add_argument( + "--original", + required=False, + default=None, + help="Path to original file (.docx/.pptx/.xlsx). If omitted, all XSD errors are reported and redlining validation is skipped.", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Enable verbose output", + ) + parser.add_argument( + "--auto-repair", + action="store_true", + help="Automatically repair common issues (hex IDs, whitespace preservation)", + ) + parser.add_argument( + "--author", + default="Claude", + help="Author name for redlining validation (default: Claude)", + ) + args = parser.parse_args() + + path = Path(args.path) + assert path.exists(), f"Error: {path} does not exist" + + original_file = None + if args.original: + original_file = Path(args.original) + assert original_file.is_file(), f"Error: {original_file} is not a file" + assert original_file.suffix.lower() in [".docx", ".pptx", ".xlsx"], ( + f"Error: {original_file} must be a .docx, .pptx, or .xlsx file" + ) + + file_extension = (original_file or path).suffix.lower() + assert file_extension in [".docx", ".pptx", ".xlsx"], ( + f"Error: Cannot determine file type from {path}. Use --original or provide a .docx/.pptx/.xlsx file." + ) + + if path.is_file() and path.suffix.lower() in [".docx", ".pptx", ".xlsx"]: + temp_dir = tempfile.mkdtemp() + with zipfile.ZipFile(path, "r") as zf: + zf.extractall(temp_dir) + unpacked_dir = Path(temp_dir) + else: + assert path.is_dir(), f"Error: {path} is not a directory or Office file" + unpacked_dir = path + + match file_extension: + case ".docx": + validators = [ + DOCXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose), + ] + if original_file: + validators.append( + RedliningValidator(unpacked_dir, original_file, verbose=args.verbose, author=args.author) + ) + case ".pptx": + validators = [ + PPTXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose), + ] + case _: + print(f"Error: Validation not supported for file type {file_extension}") + sys.exit(1) + + if args.auto_repair: + total_repairs = sum(v.repair() for v in validators) + if total_repairs: + print(f"Auto-repaired {total_repairs} issue(s)") + + success = all(v.validate() for v in validators) + + if success: + print("All validations PASSED!") + + sys.exit(0 if success else 1) + + +if __name__ == "__main__": + main() diff --git a/agentskills/docx/scripts/office/validators/__init__.py b/agentskills/docx/scripts/office/validators/__init__.py new file mode 100644 index 0000000..db092ec --- /dev/null +++ b/agentskills/docx/scripts/office/validators/__init__.py @@ -0,0 +1,15 @@ +""" +Validation modules for Word document processing. +""" + +from .base import BaseSchemaValidator +from .docx import DOCXSchemaValidator +from .pptx import PPTXSchemaValidator +from .redlining import RedliningValidator + +__all__ = [ + "BaseSchemaValidator", + "DOCXSchemaValidator", + "PPTXSchemaValidator", + "RedliningValidator", +] diff --git a/agentskills/docx/scripts/office/validators/base.py b/agentskills/docx/scripts/office/validators/base.py new file mode 100644 index 0000000..db4a06a --- /dev/null +++ b/agentskills/docx/scripts/office/validators/base.py @@ -0,0 +1,847 @@ +""" +Base validator with common validation logic for document files. +""" + +import re +from pathlib import Path + +import defusedxml.minidom +import lxml.etree + + +class BaseSchemaValidator: + + IGNORED_VALIDATION_ERRORS = [ + "hyphenationZone", + "purl.org/dc/terms", + ] + + UNIQUE_ID_REQUIREMENTS = { + "comment": ("id", "file"), + "commentrangestart": ("id", "file"), + "commentrangeend": ("id", "file"), + "bookmarkstart": ("id", "file"), + "bookmarkend": ("id", "file"), + "sldid": ("id", "file"), + "sldmasterid": ("id", "global"), + "sldlayoutid": ("id", "global"), + "cm": ("authorid", "file"), + "sheet": ("sheetid", "file"), + "definedname": ("id", "file"), + "cxnsp": ("id", "file"), + "sp": ("id", "file"), + "pic": ("id", "file"), + "grpsp": ("id", "file"), + } + + EXCLUDED_ID_CONTAINERS = { + "sectionlst", + } + + ELEMENT_RELATIONSHIP_TYPES = {} + + SCHEMA_MAPPINGS = { + "word": "ISO-IEC29500-4_2016/wml.xsd", + "ppt": "ISO-IEC29500-4_2016/pml.xsd", + "xl": "ISO-IEC29500-4_2016/sml.xsd", + "[Content_Types].xml": "ecma/fouth-edition/opc-contentTypes.xsd", + "app.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd", + "core.xml": "ecma/fouth-edition/opc-coreProperties.xsd", + "custom.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd", + ".rels": "ecma/fouth-edition/opc-relationships.xsd", + "people.xml": "microsoft/wml-2012.xsd", + "commentsIds.xml": "microsoft/wml-cid-2016.xsd", + "commentsExtensible.xml": "microsoft/wml-cex-2018.xsd", + "commentsExtended.xml": "microsoft/wml-2012.xsd", + "chart": "ISO-IEC29500-4_2016/dml-chart.xsd", + "theme": "ISO-IEC29500-4_2016/dml-main.xsd", + "drawing": "ISO-IEC29500-4_2016/dml-main.xsd", + } + + MC_NAMESPACE = "http://schemas.openxmlformats.org/markup-compatibility/2006" + XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" + + PACKAGE_RELATIONSHIPS_NAMESPACE = ( + "http://schemas.openxmlformats.org/package/2006/relationships" + ) + OFFICE_RELATIONSHIPS_NAMESPACE = ( + "http://schemas.openxmlformats.org/officeDocument/2006/relationships" + ) + CONTENT_TYPES_NAMESPACE = ( + "http://schemas.openxmlformats.org/package/2006/content-types" + ) + + MAIN_CONTENT_FOLDERS = {"word", "ppt", "xl"} + + OOXML_NAMESPACES = { + "http://schemas.openxmlformats.org/officeDocument/2006/math", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships", + "http://schemas.openxmlformats.org/schemaLibrary/2006/main", + "http://schemas.openxmlformats.org/drawingml/2006/main", + "http://schemas.openxmlformats.org/drawingml/2006/chart", + "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing", + "http://schemas.openxmlformats.org/drawingml/2006/diagram", + "http://schemas.openxmlformats.org/drawingml/2006/picture", + "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing", + "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", + "http://schemas.openxmlformats.org/wordprocessingml/2006/main", + "http://schemas.openxmlformats.org/presentationml/2006/main", + "http://schemas.openxmlformats.org/spreadsheetml/2006/main", + "http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes", + "http://www.w3.org/XML/1998/namespace", + } + + def __init__(self, unpacked_dir, original_file=None, verbose=False): + self.unpacked_dir = Path(unpacked_dir).resolve() + self.original_file = Path(original_file) if original_file else None + self.verbose = verbose + + self.schemas_dir = Path(__file__).parent.parent / "schemas" + + patterns = ["*.xml", "*.rels"] + self.xml_files = [ + f for pattern in patterns for f in self.unpacked_dir.rglob(pattern) + ] + + if not self.xml_files: + print(f"Warning: No XML files found in {self.unpacked_dir}") + + def validate(self): + raise NotImplementedError("Subclasses must implement the validate method") + + def repair(self) -> int: + return self.repair_whitespace_preservation() + + def repair_whitespace_preservation(self) -> int: + repairs = 0 + + for xml_file in self.xml_files: + try: + content = xml_file.read_text(encoding="utf-8") + dom = defusedxml.minidom.parseString(content) + modified = False + + for elem in dom.getElementsByTagName("*"): + if elem.tagName.endswith(":t") and elem.firstChild: + text = elem.firstChild.nodeValue + if text and (text.startswith((' ', '\t')) or text.endswith((' ', '\t'))): + if elem.getAttribute("xml:space") != "preserve": + elem.setAttribute("xml:space", "preserve") + text_preview = repr(text[:30]) + "..." if len(text) > 30 else repr(text) + print(f" Repaired: {xml_file.name}: Added xml:space='preserve' to {elem.tagName}: {text_preview}") + repairs += 1 + modified = True + + if modified: + xml_file.write_bytes(dom.toxml(encoding="UTF-8")) + + except Exception: + pass + + return repairs + + def validate_xml(self): + errors = [] + + for xml_file in self.xml_files: + try: + lxml.etree.parse(str(xml_file)) + except lxml.etree.XMLSyntaxError as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {e.lineno}: {e.msg}" + ) + except Exception as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Unexpected error: {str(e)}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} XML violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All XML files are well-formed") + return True + + def validate_namespaces(self): + errors = [] + + for xml_file in self.xml_files: + try: + root = lxml.etree.parse(str(xml_file)).getroot() + declared = set(root.nsmap.keys()) - {None} + + for attr_val in [ + v for k, v in root.attrib.items() if k.endswith("Ignorable") + ]: + undeclared = set(attr_val.split()) - declared + errors.extend( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Namespace '{ns}' in Ignorable but not declared" + for ns in undeclared + ) + except lxml.etree.XMLSyntaxError: + continue + + if errors: + print(f"FAILED - {len(errors)} namespace issues:") + for error in errors: + print(error) + return False + if self.verbose: + print("PASSED - All namespace prefixes properly declared") + return True + + def validate_unique_ids(self): + errors = [] + global_ids = {} + + for xml_file in self.xml_files: + try: + root = lxml.etree.parse(str(xml_file)).getroot() + file_ids = {} + + mc_elements = root.xpath( + ".//mc:AlternateContent", namespaces={"mc": self.MC_NAMESPACE} + ) + for elem in mc_elements: + elem.getparent().remove(elem) + + for elem in root.iter(): + tag = ( + elem.tag.split("}")[-1].lower() + if "}" in elem.tag + else elem.tag.lower() + ) + + if tag in self.UNIQUE_ID_REQUIREMENTS: + in_excluded_container = any( + ancestor.tag.split("}")[-1].lower() in self.EXCLUDED_ID_CONTAINERS + for ancestor in elem.iterancestors() + ) + if in_excluded_container: + continue + + attr_name, scope = self.UNIQUE_ID_REQUIREMENTS[tag] + + id_value = None + for attr, value in elem.attrib.items(): + attr_local = ( + attr.split("}")[-1].lower() + if "}" in attr + else attr.lower() + ) + if attr_local == attr_name: + id_value = value + break + + if id_value is not None: + if scope == "global": + if id_value in global_ids: + prev_file, prev_line, prev_tag = global_ids[ + id_value + ] + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {elem.sourceline}: Global ID '{id_value}' in <{tag}> " + f"already used in {prev_file} at line {prev_line} in <{prev_tag}>" + ) + else: + global_ids[id_value] = ( + xml_file.relative_to(self.unpacked_dir), + elem.sourceline, + tag, + ) + elif scope == "file": + key = (tag, attr_name) + if key not in file_ids: + file_ids[key] = {} + + if id_value in file_ids[key]: + prev_line = file_ids[key][id_value] + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {elem.sourceline}: Duplicate {attr_name}='{id_value}' in <{tag}> " + f"(first occurrence at line {prev_line})" + ) + else: + file_ids[key][id_value] = elem.sourceline + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} ID uniqueness violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All required IDs are unique") + return True + + def validate_file_references(self): + errors = [] + + rels_files = list(self.unpacked_dir.rglob("*.rels")) + + if not rels_files: + if self.verbose: + print("PASSED - No .rels files found") + return True + + all_files = [] + for file_path in self.unpacked_dir.rglob("*"): + if ( + file_path.is_file() + and file_path.name != "[Content_Types].xml" + and not file_path.name.endswith(".rels") + ): + all_files.append(file_path.resolve()) + + all_referenced_files = set() + + if self.verbose: + print( + f"Found {len(rels_files)} .rels files and {len(all_files)} target files" + ) + + for rels_file in rels_files: + try: + rels_root = lxml.etree.parse(str(rels_file)).getroot() + + rels_dir = rels_file.parent + + referenced_files = set() + broken_refs = [] + + for rel in rels_root.findall( + ".//ns:Relationship", + namespaces={"ns": self.PACKAGE_RELATIONSHIPS_NAMESPACE}, + ): + target = rel.get("Target") + if target and not target.startswith( + ("http", "mailto:") + ): + if target.startswith("/"): + target_path = self.unpacked_dir / target.lstrip("/") + elif rels_file.name == ".rels": + target_path = self.unpacked_dir / target + else: + base_dir = rels_dir.parent + target_path = base_dir / target + + try: + target_path = target_path.resolve() + if target_path.exists() and target_path.is_file(): + referenced_files.add(target_path) + all_referenced_files.add(target_path) + else: + broken_refs.append((target, rel.sourceline)) + except (OSError, ValueError): + broken_refs.append((target, rel.sourceline)) + + if broken_refs: + rel_path = rels_file.relative_to(self.unpacked_dir) + for broken_ref, line_num in broken_refs: + errors.append( + f" {rel_path}: Line {line_num}: Broken reference to {broken_ref}" + ) + + except Exception as e: + rel_path = rels_file.relative_to(self.unpacked_dir) + errors.append(f" Error parsing {rel_path}: {e}") + + unreferenced_files = set(all_files) - all_referenced_files + + if unreferenced_files: + for unref_file in sorted(unreferenced_files): + unref_rel_path = unref_file.relative_to(self.unpacked_dir) + errors.append(f" Unreferenced file: {unref_rel_path}") + + if errors: + print(f"FAILED - Found {len(errors)} relationship validation errors:") + for error in errors: + print(error) + print( + "CRITICAL: These errors will cause the document to appear corrupt. " + + "Broken references MUST be fixed, " + + "and unreferenced files MUST be referenced or removed." + ) + return False + else: + if self.verbose: + print( + "PASSED - All references are valid and all files are properly referenced" + ) + return True + + def validate_all_relationship_ids(self): + import lxml.etree + + errors = [] + + for xml_file in self.xml_files: + if xml_file.suffix == ".rels": + continue + + rels_dir = xml_file.parent / "_rels" + rels_file = rels_dir / f"{xml_file.name}.rels" + + if not rels_file.exists(): + continue + + try: + rels_root = lxml.etree.parse(str(rels_file)).getroot() + rid_to_type = {} + + for rel in rels_root.findall( + f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship" + ): + rid = rel.get("Id") + rel_type = rel.get("Type", "") + if rid: + if rid in rid_to_type: + rels_rel_path = rels_file.relative_to(self.unpacked_dir) + errors.append( + f" {rels_rel_path}: Line {rel.sourceline}: " + f"Duplicate relationship ID '{rid}' (IDs must be unique)" + ) + type_name = ( + rel_type.split("/")[-1] if "/" in rel_type else rel_type + ) + rid_to_type[rid] = type_name + + xml_root = lxml.etree.parse(str(xml_file)).getroot() + + r_ns = self.OFFICE_RELATIONSHIPS_NAMESPACE + rid_attrs_to_check = ["id", "embed", "link"] + for elem in xml_root.iter(): + for attr_name in rid_attrs_to_check: + rid_attr = elem.get(f"{{{r_ns}}}{attr_name}") + if not rid_attr: + continue + xml_rel_path = xml_file.relative_to(self.unpacked_dir) + elem_name = ( + elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag + ) + + if rid_attr not in rid_to_type: + errors.append( + f" {xml_rel_path}: Line {elem.sourceline}: " + f"<{elem_name}> r:{attr_name} references non-existent relationship '{rid_attr}' " + f"(valid IDs: {', '.join(sorted(rid_to_type.keys())[:5])}{'...' if len(rid_to_type) > 5 else ''})" + ) + elif attr_name == "id" and self.ELEMENT_RELATIONSHIP_TYPES: + expected_type = self._get_expected_relationship_type( + elem_name + ) + if expected_type: + actual_type = rid_to_type[rid_attr] + if expected_type not in actual_type.lower(): + errors.append( + f" {xml_rel_path}: Line {elem.sourceline}: " + f"<{elem_name}> references '{rid_attr}' which points to '{actual_type}' " + f"but should point to a '{expected_type}' relationship" + ) + + except Exception as e: + xml_rel_path = xml_file.relative_to(self.unpacked_dir) + errors.append(f" Error processing {xml_rel_path}: {e}") + + if errors: + print(f"FAILED - Found {len(errors)} relationship ID reference errors:") + for error in errors: + print(error) + print("\nThese ID mismatches will cause the document to appear corrupt!") + return False + else: + if self.verbose: + print("PASSED - All relationship ID references are valid") + return True + + def _get_expected_relationship_type(self, element_name): + elem_lower = element_name.lower() + + if elem_lower in self.ELEMENT_RELATIONSHIP_TYPES: + return self.ELEMENT_RELATIONSHIP_TYPES[elem_lower] + + if elem_lower.endswith("id") and len(elem_lower) > 2: + prefix = elem_lower[:-2] + if prefix.endswith("master"): + return prefix.lower() + elif prefix.endswith("layout"): + return prefix.lower() + else: + if prefix == "sld": + return "slide" + return prefix.lower() + + if elem_lower.endswith("reference") and len(elem_lower) > 9: + prefix = elem_lower[:-9] + return prefix.lower() + + return None + + def validate_content_types(self): + errors = [] + + content_types_file = self.unpacked_dir / "[Content_Types].xml" + if not content_types_file.exists(): + print("FAILED - [Content_Types].xml file not found") + return False + + try: + root = lxml.etree.parse(str(content_types_file)).getroot() + declared_parts = set() + declared_extensions = set() + + for override in root.findall( + f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Override" + ): + part_name = override.get("PartName") + if part_name is not None: + declared_parts.add(part_name.lstrip("/")) + + for default in root.findall( + f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Default" + ): + extension = default.get("Extension") + if extension is not None: + declared_extensions.add(extension.lower()) + + declarable_roots = { + "sld", + "sldLayout", + "sldMaster", + "presentation", + "document", + "workbook", + "worksheet", + "theme", + } + + media_extensions = { + "png": "image/png", + "jpg": "image/jpeg", + "jpeg": "image/jpeg", + "gif": "image/gif", + "bmp": "image/bmp", + "tiff": "image/tiff", + "wmf": "image/x-wmf", + "emf": "image/x-emf", + } + + all_files = list(self.unpacked_dir.rglob("*")) + all_files = [f for f in all_files if f.is_file()] + + for xml_file in self.xml_files: + path_str = str(xml_file.relative_to(self.unpacked_dir)).replace( + "\\", "/" + ) + + if any( + skip in path_str + for skip in [".rels", "[Content_Types]", "docProps/", "_rels/"] + ): + continue + + try: + root_tag = lxml.etree.parse(str(xml_file)).getroot().tag + root_name = root_tag.split("}")[-1] if "}" in root_tag else root_tag + + if root_name in declarable_roots and path_str not in declared_parts: + errors.append( + f" {path_str}: File with <{root_name}> root not declared in [Content_Types].xml" + ) + + except Exception: + continue + + for file_path in all_files: + if file_path.suffix.lower() in {".xml", ".rels"}: + continue + if file_path.name == "[Content_Types].xml": + continue + if "_rels" in file_path.parts or "docProps" in file_path.parts: + continue + + extension = file_path.suffix.lstrip(".").lower() + if extension and extension not in declared_extensions: + if extension in media_extensions: + relative_path = file_path.relative_to(self.unpacked_dir) + errors.append( + f' {relative_path}: File with extension \'{extension}\' not declared in [Content_Types].xml - should add: ' + ) + + except Exception as e: + errors.append(f" Error parsing [Content_Types].xml: {e}") + + if errors: + print(f"FAILED - Found {len(errors)} content type declaration errors:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print( + "PASSED - All content files are properly declared in [Content_Types].xml" + ) + return True + + def validate_file_against_xsd(self, xml_file, verbose=False): + xml_file = Path(xml_file).resolve() + unpacked_dir = self.unpacked_dir.resolve() + + is_valid, current_errors = self._validate_single_file_xsd( + xml_file, unpacked_dir + ) + + if is_valid is None: + return None, set() + elif is_valid: + return True, set() + + original_errors = self._get_original_file_errors(xml_file) + + assert current_errors is not None + new_errors = current_errors - original_errors + + new_errors = { + e for e in new_errors + if not any(pattern in e for pattern in self.IGNORED_VALIDATION_ERRORS) + } + + if new_errors: + if verbose: + relative_path = xml_file.relative_to(unpacked_dir) + print(f"FAILED - {relative_path}: {len(new_errors)} new error(s)") + for error in list(new_errors)[:3]: + truncated = error[:250] + "..." if len(error) > 250 else error + print(f" - {truncated}") + return False, new_errors + else: + if verbose: + print( + f"PASSED - No new errors (original had {len(current_errors)} errors)" + ) + return True, set() + + def validate_against_xsd(self): + new_errors = [] + original_error_count = 0 + valid_count = 0 + skipped_count = 0 + + for xml_file in self.xml_files: + relative_path = str(xml_file.relative_to(self.unpacked_dir)) + is_valid, new_file_errors = self.validate_file_against_xsd( + xml_file, verbose=False + ) + + if is_valid is None: + skipped_count += 1 + continue + elif is_valid and not new_file_errors: + valid_count += 1 + continue + elif is_valid: + original_error_count += 1 + valid_count += 1 + continue + + new_errors.append(f" {relative_path}: {len(new_file_errors)} new error(s)") + for error in list(new_file_errors)[:3]: + new_errors.append( + f" - {error[:250]}..." if len(error) > 250 else f" - {error}" + ) + + if self.verbose: + print(f"Validated {len(self.xml_files)} files:") + print(f" - Valid: {valid_count}") + print(f" - Skipped (no schema): {skipped_count}") + if original_error_count: + print(f" - With original errors (ignored): {original_error_count}") + print( + f" - With NEW errors: {len(new_errors) > 0 and len([e for e in new_errors if not e.startswith(' ')]) or 0}" + ) + + if new_errors: + print("\nFAILED - Found NEW validation errors:") + for error in new_errors: + print(error) + return False + else: + if self.verbose: + print("\nPASSED - No new XSD validation errors introduced") + return True + + def _get_schema_path(self, xml_file): + if xml_file.name in self.SCHEMA_MAPPINGS: + return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.name] + + if xml_file.suffix == ".rels": + return self.schemas_dir / self.SCHEMA_MAPPINGS[".rels"] + + if "charts/" in str(xml_file) and xml_file.name.startswith("chart"): + return self.schemas_dir / self.SCHEMA_MAPPINGS["chart"] + + if "theme/" in str(xml_file) and xml_file.name.startswith("theme"): + return self.schemas_dir / self.SCHEMA_MAPPINGS["theme"] + + if xml_file.parent.name in self.MAIN_CONTENT_FOLDERS: + return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.parent.name] + + return None + + def _clean_ignorable_namespaces(self, xml_doc): + xml_string = lxml.etree.tostring(xml_doc, encoding="unicode") + xml_copy = lxml.etree.fromstring(xml_string) + + for elem in xml_copy.iter(): + attrs_to_remove = [] + + for attr in elem.attrib: + if "{" in attr: + ns = attr.split("}")[0][1:] + if ns not in self.OOXML_NAMESPACES: + attrs_to_remove.append(attr) + + for attr in attrs_to_remove: + del elem.attrib[attr] + + self._remove_ignorable_elements(xml_copy) + + return lxml.etree.ElementTree(xml_copy) + + def _remove_ignorable_elements(self, root): + elements_to_remove = [] + + for elem in list(root): + if not hasattr(elem, "tag") or callable(elem.tag): + continue + + tag_str = str(elem.tag) + if tag_str.startswith("{"): + ns = tag_str.split("}")[0][1:] + if ns not in self.OOXML_NAMESPACES: + elements_to_remove.append(elem) + continue + + self._remove_ignorable_elements(elem) + + for elem in elements_to_remove: + root.remove(elem) + + def _preprocess_for_mc_ignorable(self, xml_doc): + root = xml_doc.getroot() + + if f"{{{self.MC_NAMESPACE}}}Ignorable" in root.attrib: + del root.attrib[f"{{{self.MC_NAMESPACE}}}Ignorable"] + + return xml_doc + + def _validate_single_file_xsd(self, xml_file, base_path): + schema_path = self._get_schema_path(xml_file) + if not schema_path: + return None, None + + try: + with open(schema_path, "rb") as xsd_file: + parser = lxml.etree.XMLParser() + xsd_doc = lxml.etree.parse( + xsd_file, parser=parser, base_url=str(schema_path) + ) + schema = lxml.etree.XMLSchema(xsd_doc) + + with open(xml_file, "r") as f: + xml_doc = lxml.etree.parse(f) + + xml_doc, _ = self._remove_template_tags_from_text_nodes(xml_doc) + xml_doc = self._preprocess_for_mc_ignorable(xml_doc) + + relative_path = xml_file.relative_to(base_path) + if ( + relative_path.parts + and relative_path.parts[0] in self.MAIN_CONTENT_FOLDERS + ): + xml_doc = self._clean_ignorable_namespaces(xml_doc) + + if schema.validate(xml_doc): + return True, set() + else: + errors = set() + for error in schema.error_log: + errors.add(error.message) + return False, errors + + except Exception as e: + return False, {str(e)} + + def _get_original_file_errors(self, xml_file): + if self.original_file is None: + return set() + + import tempfile + import zipfile + + xml_file = Path(xml_file).resolve() + unpacked_dir = self.unpacked_dir.resolve() + relative_path = xml_file.relative_to(unpacked_dir) + + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + with zipfile.ZipFile(self.original_file, "r") as zip_ref: + zip_ref.extractall(temp_path) + + original_xml_file = temp_path / relative_path + + if not original_xml_file.exists(): + return set() + + is_valid, errors = self._validate_single_file_xsd( + original_xml_file, temp_path + ) + return errors if errors else set() + + def _remove_template_tags_from_text_nodes(self, xml_doc): + warnings = [] + template_pattern = re.compile(r"\{\{[^}]*\}\}") + + xml_string = lxml.etree.tostring(xml_doc, encoding="unicode") + xml_copy = lxml.etree.fromstring(xml_string) + + def process_text_content(text, content_type): + if not text: + return text + matches = list(template_pattern.finditer(text)) + if matches: + for match in matches: + warnings.append( + f"Found template tag in {content_type}: {match.group()}" + ) + return template_pattern.sub("", text) + return text + + for elem in xml_copy.iter(): + if not hasattr(elem, "tag") or callable(elem.tag): + continue + tag_str = str(elem.tag) + if tag_str.endswith("}t") or tag_str == "t": + continue + + elem.text = process_text_content(elem.text, "text content") + elem.tail = process_text_content(elem.tail, "tail content") + + return lxml.etree.ElementTree(xml_copy), warnings + + +if __name__ == "__main__": + raise RuntimeError("This module should not be run directly.") diff --git a/agentskills/docx/scripts/office/validators/docx.py b/agentskills/docx/scripts/office/validators/docx.py new file mode 100644 index 0000000..fec405e --- /dev/null +++ b/agentskills/docx/scripts/office/validators/docx.py @@ -0,0 +1,446 @@ +""" +Validator for Word document XML files against XSD schemas. +""" + +import random +import re +import tempfile +import zipfile + +import defusedxml.minidom +import lxml.etree + +from .base import BaseSchemaValidator + + +class DOCXSchemaValidator(BaseSchemaValidator): + + WORD_2006_NAMESPACE = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + W14_NAMESPACE = "http://schemas.microsoft.com/office/word/2010/wordml" + W16CID_NAMESPACE = "http://schemas.microsoft.com/office/word/2016/wordml/cid" + + ELEMENT_RELATIONSHIP_TYPES = {} + + def validate(self): + if not self.validate_xml(): + return False + + all_valid = True + if not self.validate_namespaces(): + all_valid = False + + if not self.validate_unique_ids(): + all_valid = False + + if not self.validate_file_references(): + all_valid = False + + if not self.validate_content_types(): + all_valid = False + + if not self.validate_against_xsd(): + all_valid = False + + if not self.validate_whitespace_preservation(): + all_valid = False + + if not self.validate_deletions(): + all_valid = False + + if not self.validate_insertions(): + all_valid = False + + if not self.validate_all_relationship_ids(): + all_valid = False + + if not self.validate_id_constraints(): + all_valid = False + + if not self.validate_comment_markers(): + all_valid = False + + self.compare_paragraph_counts() + + return all_valid + + def validate_whitespace_preservation(self): + errors = [] + + for xml_file in self.xml_files: + if xml_file.name != "document.xml": + continue + + try: + root = lxml.etree.parse(str(xml_file)).getroot() + + for elem in root.iter(f"{{{self.WORD_2006_NAMESPACE}}}t"): + if elem.text: + text = elem.text + if re.search(r"^[ \t\n\r]", text) or re.search( + r"[ \t\n\r]$", text + ): + xml_space_attr = f"{{{self.XML_NAMESPACE}}}space" + if ( + xml_space_attr not in elem.attrib + or elem.attrib[xml_space_attr] != "preserve" + ): + text_preview = ( + repr(text)[:50] + "..." + if len(repr(text)) > 50 + else repr(text) + ) + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {elem.sourceline}: w:t element with whitespace missing xml:space='preserve': {text_preview}" + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} whitespace preservation violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All whitespace is properly preserved") + return True + + def validate_deletions(self): + errors = [] + + for xml_file in self.xml_files: + if xml_file.name != "document.xml": + continue + + try: + root = lxml.etree.parse(str(xml_file)).getroot() + namespaces = {"w": self.WORD_2006_NAMESPACE} + + for t_elem in root.xpath(".//w:del//w:t", namespaces=namespaces): + if t_elem.text: + text_preview = ( + repr(t_elem.text)[:50] + "..." + if len(repr(t_elem.text)) > 50 + else repr(t_elem.text) + ) + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {t_elem.sourceline}: found within : {text_preview}" + ) + + for instr_elem in root.xpath( + ".//w:del//w:instrText", namespaces=namespaces + ): + text_preview = ( + repr(instr_elem.text or "")[:50] + "..." + if len(repr(instr_elem.text or "")) > 50 + else repr(instr_elem.text or "") + ) + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {instr_elem.sourceline}: found within (use ): {text_preview}" + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} deletion validation violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - No w:t elements found within w:del elements") + return True + + def count_paragraphs_in_unpacked(self): + count = 0 + + for xml_file in self.xml_files: + if xml_file.name != "document.xml": + continue + + try: + root = lxml.etree.parse(str(xml_file)).getroot() + paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p") + count = len(paragraphs) + except Exception as e: + print(f"Error counting paragraphs in unpacked document: {e}") + + return count + + def count_paragraphs_in_original(self): + original = self.original_file + if original is None: + return 0 + + count = 0 + + try: + with tempfile.TemporaryDirectory() as temp_dir: + with zipfile.ZipFile(original, "r") as zip_ref: + zip_ref.extractall(temp_dir) + + doc_xml_path = temp_dir + "/word/document.xml" + root = lxml.etree.parse(doc_xml_path).getroot() + + paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p") + count = len(paragraphs) + + except Exception as e: + print(f"Error counting paragraphs in original document: {e}") + + return count + + def validate_insertions(self): + errors = [] + + for xml_file in self.xml_files: + if xml_file.name != "document.xml": + continue + + try: + root = lxml.etree.parse(str(xml_file)).getroot() + namespaces = {"w": self.WORD_2006_NAMESPACE} + + invalid_elements = root.xpath( + ".//w:ins//w:delText[not(ancestor::w:del)]", namespaces=namespaces + ) + + for elem in invalid_elements: + text_preview = ( + repr(elem.text or "")[:50] + "..." + if len(repr(elem.text or "")) > 50 + else repr(elem.text or "") + ) + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {elem.sourceline}: within : {text_preview}" + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} insertion validation violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - No w:delText elements within w:ins elements") + return True + + def compare_paragraph_counts(self): + original_count = self.count_paragraphs_in_original() + new_count = self.count_paragraphs_in_unpacked() + + diff = new_count - original_count + diff_str = f"+{diff}" if diff > 0 else str(diff) + print(f"\nParagraphs: {original_count} → {new_count} ({diff_str})") + + def _parse_id_value(self, val: str, base: int = 16) -> int: + return int(val, base) + + def validate_id_constraints(self): + errors = [] + para_id_attr = f"{{{self.W14_NAMESPACE}}}paraId" + durable_id_attr = f"{{{self.W16CID_NAMESPACE}}}durableId" + + for xml_file in self.xml_files: + try: + for elem in lxml.etree.parse(str(xml_file)).iter(): + if val := elem.get(para_id_attr): + if self._parse_id_value(val, base=16) >= 0x80000000: + errors.append( + f" {xml_file.name}:{elem.sourceline}: paraId={val} >= 0x80000000" + ) + + if val := elem.get(durable_id_attr): + if xml_file.name == "numbering.xml": + try: + if self._parse_id_value(val, base=10) >= 0x7FFFFFFF: + errors.append( + f" {xml_file.name}:{elem.sourceline}: " + f"durableId={val} >= 0x7FFFFFFF" + ) + except ValueError: + errors.append( + f" {xml_file.name}:{elem.sourceline}: " + f"durableId={val} must be decimal in numbering.xml" + ) + else: + if self._parse_id_value(val, base=16) >= 0x7FFFFFFF: + errors.append( + f" {xml_file.name}:{elem.sourceline}: " + f"durableId={val} >= 0x7FFFFFFF" + ) + except Exception: + pass + + if errors: + print(f"FAILED - {len(errors)} ID constraint violations:") + for e in errors: + print(e) + elif self.verbose: + print("PASSED - All paraId/durableId values within constraints") + return not errors + + def validate_comment_markers(self): + errors = [] + + document_xml = None + comments_xml = None + for xml_file in self.xml_files: + if xml_file.name == "document.xml" and "word" in str(xml_file): + document_xml = xml_file + elif xml_file.name == "comments.xml": + comments_xml = xml_file + + if not document_xml: + if self.verbose: + print("PASSED - No document.xml found (skipping comment validation)") + return True + + try: + doc_root = lxml.etree.parse(str(document_xml)).getroot() + namespaces = {"w": self.WORD_2006_NAMESPACE} + + range_starts = { + elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id") + for elem in doc_root.xpath( + ".//w:commentRangeStart", namespaces=namespaces + ) + } + range_ends = { + elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id") + for elem in doc_root.xpath( + ".//w:commentRangeEnd", namespaces=namespaces + ) + } + references = { + elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id") + for elem in doc_root.xpath( + ".//w:commentReference", namespaces=namespaces + ) + } + + orphaned_ends = range_ends - range_starts + for comment_id in sorted( + orphaned_ends, key=lambda x: int(x) if x and x.isdigit() else 0 + ): + errors.append( + f' document.xml: commentRangeEnd id="{comment_id}" has no matching commentRangeStart' + ) + + orphaned_starts = range_starts - range_ends + for comment_id in sorted( + orphaned_starts, key=lambda x: int(x) if x and x.isdigit() else 0 + ): + errors.append( + f' document.xml: commentRangeStart id="{comment_id}" has no matching commentRangeEnd' + ) + + comment_ids = set() + if comments_xml and comments_xml.exists(): + comments_root = lxml.etree.parse(str(comments_xml)).getroot() + comment_ids = { + elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id") + for elem in comments_root.xpath( + ".//w:comment", namespaces=namespaces + ) + } + + marker_ids = range_starts | range_ends | references + invalid_refs = marker_ids - comment_ids + for comment_id in sorted( + invalid_refs, key=lambda x: int(x) if x and x.isdigit() else 0 + ): + if comment_id: + errors.append( + f' document.xml: marker id="{comment_id}" references non-existent comment' + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append(f" Error parsing XML: {e}") + + if errors: + print(f"FAILED - {len(errors)} comment marker violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All comment markers properly paired") + return True + + def repair(self) -> int: + repairs = super().repair() + repairs += self.repair_durableId() + return repairs + + def repair_durableId(self) -> int: + repairs = 0 + + for xml_file in self.xml_files: + try: + content = xml_file.read_text(encoding="utf-8") + dom = defusedxml.minidom.parseString(content) + modified = False + + for elem in dom.getElementsByTagName("*"): + if not elem.hasAttribute("w16cid:durableId"): + continue + + durable_id = elem.getAttribute("w16cid:durableId") + needs_repair = False + + if xml_file.name == "numbering.xml": + try: + needs_repair = ( + self._parse_id_value(durable_id, base=10) >= 0x7FFFFFFF + ) + except ValueError: + needs_repair = True + else: + try: + needs_repair = ( + self._parse_id_value(durable_id, base=16) >= 0x7FFFFFFF + ) + except ValueError: + needs_repair = True + + if needs_repair: + value = random.randint(1, 0x7FFFFFFE) + if xml_file.name == "numbering.xml": + new_id = str(value) + else: + new_id = f"{value:08X}" + + elem.setAttribute("w16cid:durableId", new_id) + print( + f" Repaired: {xml_file.name}: durableId {durable_id} → {new_id}" + ) + repairs += 1 + modified = True + + if modified: + xml_file.write_bytes(dom.toxml(encoding="UTF-8")) + + except Exception: + pass + + return repairs + + +if __name__ == "__main__": + raise RuntimeError("This module should not be run directly.") diff --git a/agentskills/docx/scripts/office/validators/pptx.py b/agentskills/docx/scripts/office/validators/pptx.py new file mode 100644 index 0000000..09842aa --- /dev/null +++ b/agentskills/docx/scripts/office/validators/pptx.py @@ -0,0 +1,275 @@ +""" +Validator for PowerPoint presentation XML files against XSD schemas. +""" + +import re + +from .base import BaseSchemaValidator + + +class PPTXSchemaValidator(BaseSchemaValidator): + + PRESENTATIONML_NAMESPACE = ( + "http://schemas.openxmlformats.org/presentationml/2006/main" + ) + + ELEMENT_RELATIONSHIP_TYPES = { + "sldid": "slide", + "sldmasterid": "slidemaster", + "notesmasterid": "notesmaster", + "sldlayoutid": "slidelayout", + "themeid": "theme", + "tablestyleid": "tablestyles", + } + + def validate(self): + if not self.validate_xml(): + return False + + all_valid = True + if not self.validate_namespaces(): + all_valid = False + + if not self.validate_unique_ids(): + all_valid = False + + if not self.validate_uuid_ids(): + all_valid = False + + if not self.validate_file_references(): + all_valid = False + + if not self.validate_slide_layout_ids(): + all_valid = False + + if not self.validate_content_types(): + all_valid = False + + if not self.validate_against_xsd(): + all_valid = False + + if not self.validate_notes_slide_references(): + all_valid = False + + if not self.validate_all_relationship_ids(): + all_valid = False + + if not self.validate_no_duplicate_slide_layouts(): + all_valid = False + + return all_valid + + def validate_uuid_ids(self): + import lxml.etree + + errors = [] + uuid_pattern = re.compile( + r"^[\{\(]?[0-9A-Fa-f]{8}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{12}[\}\)]?$" + ) + + for xml_file in self.xml_files: + try: + root = lxml.etree.parse(str(xml_file)).getroot() + + for elem in root.iter(): + for attr, value in elem.attrib.items(): + attr_name = attr.split("}")[-1].lower() + if attr_name == "id" or attr_name.endswith("id"): + if self._looks_like_uuid(value): + if not uuid_pattern.match(value): + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {elem.sourceline}: ID '{value}' appears to be a UUID but contains invalid hex characters" + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} UUID ID validation errors:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All UUID-like IDs contain valid hex values") + return True + + def _looks_like_uuid(self, value): + clean_value = value.strip("{}()").replace("-", "") + return len(clean_value) == 32 and all(c.isalnum() for c in clean_value) + + def validate_slide_layout_ids(self): + import lxml.etree + + errors = [] + + slide_masters = list(self.unpacked_dir.glob("ppt/slideMasters/*.xml")) + + if not slide_masters: + if self.verbose: + print("PASSED - No slide masters found") + return True + + for slide_master in slide_masters: + try: + root = lxml.etree.parse(str(slide_master)).getroot() + + rels_file = slide_master.parent / "_rels" / f"{slide_master.name}.rels" + + if not rels_file.exists(): + errors.append( + f" {slide_master.relative_to(self.unpacked_dir)}: " + f"Missing relationships file: {rels_file.relative_to(self.unpacked_dir)}" + ) + continue + + rels_root = lxml.etree.parse(str(rels_file)).getroot() + + valid_layout_rids = set() + for rel in rels_root.findall( + f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship" + ): + rel_type = rel.get("Type", "") + if "slideLayout" in rel_type: + valid_layout_rids.add(rel.get("Id")) + + for sld_layout_id in root.findall( + f".//{{{self.PRESENTATIONML_NAMESPACE}}}sldLayoutId" + ): + r_id = sld_layout_id.get( + f"{{{self.OFFICE_RELATIONSHIPS_NAMESPACE}}}id" + ) + layout_id = sld_layout_id.get("id") + + if r_id and r_id not in valid_layout_rids: + errors.append( + f" {slide_master.relative_to(self.unpacked_dir)}: " + f"Line {sld_layout_id.sourceline}: sldLayoutId with id='{layout_id}' " + f"references r:id='{r_id}' which is not found in slide layout relationships" + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {slide_master.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} slide layout ID validation errors:") + for error in errors: + print(error) + print( + "Remove invalid references or add missing slide layouts to the relationships file." + ) + return False + else: + if self.verbose: + print("PASSED - All slide layout IDs reference valid slide layouts") + return True + + def validate_no_duplicate_slide_layouts(self): + import lxml.etree + + errors = [] + slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels")) + + for rels_file in slide_rels_files: + try: + root = lxml.etree.parse(str(rels_file)).getroot() + + layout_rels = [ + rel + for rel in root.findall( + f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship" + ) + if "slideLayout" in rel.get("Type", "") + ] + + if len(layout_rels) > 1: + errors.append( + f" {rels_file.relative_to(self.unpacked_dir)}: has {len(layout_rels)} slideLayout references" + ) + + except Exception as e: + errors.append( + f" {rels_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print("FAILED - Found slides with duplicate slideLayout references:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All slides have exactly one slideLayout reference") + return True + + def validate_notes_slide_references(self): + import lxml.etree + + errors = [] + notes_slide_references = {} + + slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels")) + + if not slide_rels_files: + if self.verbose: + print("PASSED - No slide relationship files found") + return True + + for rels_file in slide_rels_files: + try: + root = lxml.etree.parse(str(rels_file)).getroot() + + for rel in root.findall( + f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship" + ): + rel_type = rel.get("Type", "") + if "notesSlide" in rel_type: + target = rel.get("Target", "") + if target: + normalized_target = target.replace("../", "") + + slide_name = rels_file.stem.replace( + ".xml", "" + ) + + if normalized_target not in notes_slide_references: + notes_slide_references[normalized_target] = [] + notes_slide_references[normalized_target].append( + (slide_name, rels_file) + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {rels_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + for target, references in notes_slide_references.items(): + if len(references) > 1: + slide_names = [ref[0] for ref in references] + errors.append( + f" Notes slide '{target}' is referenced by multiple slides: {', '.join(slide_names)}" + ) + for slide_name, rels_file in references: + errors.append(f" - {rels_file.relative_to(self.unpacked_dir)}") + + if errors: + print( + f"FAILED - Found {len([e for e in errors if not e.startswith(' ')])} notes slide reference validation errors:" + ) + for error in errors: + print(error) + print("Each slide may optionally have its own slide file.") + return False + else: + if self.verbose: + print("PASSED - All notes slide references are unique") + return True + + +if __name__ == "__main__": + raise RuntimeError("This module should not be run directly.") diff --git a/agentskills/docx/scripts/office/validators/redlining.py b/agentskills/docx/scripts/office/validators/redlining.py new file mode 100644 index 0000000..71c81b6 --- /dev/null +++ b/agentskills/docx/scripts/office/validators/redlining.py @@ -0,0 +1,247 @@ +""" +Validator for tracked changes in Word documents. +""" + +import subprocess +import tempfile +import zipfile +from pathlib import Path + + +class RedliningValidator: + + def __init__(self, unpacked_dir, original_docx, verbose=False, author="Claude"): + self.unpacked_dir = Path(unpacked_dir) + self.original_docx = Path(original_docx) + self.verbose = verbose + self.author = author + self.namespaces = { + "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + } + + def repair(self) -> int: + return 0 + + def validate(self): + modified_file = self.unpacked_dir / "word" / "document.xml" + if not modified_file.exists(): + print(f"FAILED - Modified document.xml not found at {modified_file}") + return False + + try: + import xml.etree.ElementTree as ET + + tree = ET.parse(modified_file) + root = tree.getroot() + + del_elements = root.findall(".//w:del", self.namespaces) + ins_elements = root.findall(".//w:ins", self.namespaces) + + author_del_elements = [ + elem + for elem in del_elements + if elem.get(f"{{{self.namespaces['w']}}}author") == self.author + ] + author_ins_elements = [ + elem + for elem in ins_elements + if elem.get(f"{{{self.namespaces['w']}}}author") == self.author + ] + + if not author_del_elements and not author_ins_elements: + if self.verbose: + print(f"PASSED - No tracked changes by {self.author} found.") + return True + + except Exception: + pass + + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + try: + with zipfile.ZipFile(self.original_docx, "r") as zip_ref: + zip_ref.extractall(temp_path) + except Exception as e: + print(f"FAILED - Error unpacking original docx: {e}") + return False + + original_file = temp_path / "word" / "document.xml" + if not original_file.exists(): + print( + f"FAILED - Original document.xml not found in {self.original_docx}" + ) + return False + + try: + import xml.etree.ElementTree as ET + + modified_tree = ET.parse(modified_file) + modified_root = modified_tree.getroot() + original_tree = ET.parse(original_file) + original_root = original_tree.getroot() + except ET.ParseError as e: + print(f"FAILED - Error parsing XML files: {e}") + return False + + self._remove_author_tracked_changes(original_root) + self._remove_author_tracked_changes(modified_root) + + modified_text = self._extract_text_content(modified_root) + original_text = self._extract_text_content(original_root) + + if modified_text != original_text: + error_message = self._generate_detailed_diff( + original_text, modified_text + ) + print(error_message) + return False + + if self.verbose: + print(f"PASSED - All changes by {self.author} are properly tracked") + return True + + def _generate_detailed_diff(self, original_text, modified_text): + error_parts = [ + f"FAILED - Document text doesn't match after removing {self.author}'s tracked changes", + "", + "Likely causes:", + " 1. Modified text inside another author's or tags", + " 2. Made edits without proper tracked changes", + " 3. Didn't nest inside when deleting another's insertion", + "", + "For pre-redlined documents, use correct patterns:", + " - To reject another's INSERTION: Nest inside their ", + " - To restore another's DELETION: Add new AFTER their ", + "", + ] + + git_diff = self._get_git_word_diff(original_text, modified_text) + if git_diff: + error_parts.extend(["Differences:", "============", git_diff]) + else: + error_parts.append("Unable to generate word diff (git not available)") + + return "\n".join(error_parts) + + def _get_git_word_diff(self, original_text, modified_text): + try: + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + original_file = temp_path / "original.txt" + modified_file = temp_path / "modified.txt" + + original_file.write_text(original_text, encoding="utf-8") + modified_file.write_text(modified_text, encoding="utf-8") + + result = subprocess.run( + [ + "git", + "diff", + "--word-diff=plain", + "--word-diff-regex=.", + "-U0", + "--no-index", + str(original_file), + str(modified_file), + ], + capture_output=True, + text=True, + ) + + if result.stdout.strip(): + lines = result.stdout.split("\n") + content_lines = [] + in_content = False + for line in lines: + if line.startswith("@@"): + in_content = True + continue + if in_content and line.strip(): + content_lines.append(line) + + if content_lines: + return "\n".join(content_lines) + + result = subprocess.run( + [ + "git", + "diff", + "--word-diff=plain", + "-U0", + "--no-index", + str(original_file), + str(modified_file), + ], + capture_output=True, + text=True, + ) + + if result.stdout.strip(): + lines = result.stdout.split("\n") + content_lines = [] + in_content = False + for line in lines: + if line.startswith("@@"): + in_content = True + continue + if in_content and line.strip(): + content_lines.append(line) + return "\n".join(content_lines) + + except (subprocess.CalledProcessError, FileNotFoundError, Exception): + pass + + return None + + def _remove_author_tracked_changes(self, root): + ins_tag = f"{{{self.namespaces['w']}}}ins" + del_tag = f"{{{self.namespaces['w']}}}del" + author_attr = f"{{{self.namespaces['w']}}}author" + + for parent in root.iter(): + to_remove = [] + for child in parent: + if child.tag == ins_tag and child.get(author_attr) == self.author: + to_remove.append(child) + for elem in to_remove: + parent.remove(elem) + + deltext_tag = f"{{{self.namespaces['w']}}}delText" + t_tag = f"{{{self.namespaces['w']}}}t" + + for parent in root.iter(): + to_process = [] + for child in parent: + if child.tag == del_tag and child.get(author_attr) == self.author: + to_process.append((child, list(parent).index(child))) + + for del_elem, del_index in reversed(to_process): + for elem in del_elem.iter(): + if elem.tag == deltext_tag: + elem.tag = t_tag + + for child in reversed(list(del_elem)): + parent.insert(del_index, child) + parent.remove(del_elem) + + def _extract_text_content(self, root): + p_tag = f"{{{self.namespaces['w']}}}p" + t_tag = f"{{{self.namespaces['w']}}}t" + + paragraphs = [] + for p_elem in root.findall(f".//{p_tag}"): + text_parts = [] + for t_elem in p_elem.findall(f".//{t_tag}"): + if t_elem.text: + text_parts.append(t_elem.text) + paragraph_text = "".join(text_parts) + if paragraph_text: + paragraphs.append(paragraph_text) + + return "\n".join(paragraphs) + + +if __name__ == "__main__": + raise RuntimeError("This module should not be run directly.") diff --git a/agentskills/docx/scripts/templates/comments.xml b/agentskills/docx/scripts/templates/comments.xml new file mode 100644 index 0000000..cd01a7d --- /dev/null +++ b/agentskills/docx/scripts/templates/comments.xml @@ -0,0 +1,3 @@ + + + diff --git a/agentskills/docx/scripts/templates/commentsExtended.xml b/agentskills/docx/scripts/templates/commentsExtended.xml new file mode 100644 index 0000000..411003c --- /dev/null +++ b/agentskills/docx/scripts/templates/commentsExtended.xml @@ -0,0 +1,3 @@ + + + diff --git a/agentskills/docx/scripts/templates/commentsExtensible.xml b/agentskills/docx/scripts/templates/commentsExtensible.xml new file mode 100644 index 0000000..f5572d7 --- /dev/null +++ b/agentskills/docx/scripts/templates/commentsExtensible.xml @@ -0,0 +1,3 @@ + + + diff --git a/agentskills/docx/scripts/templates/commentsIds.xml b/agentskills/docx/scripts/templates/commentsIds.xml new file mode 100644 index 0000000..32f1629 --- /dev/null +++ b/agentskills/docx/scripts/templates/commentsIds.xml @@ -0,0 +1,3 @@ + + + diff --git a/agentskills/docx/scripts/templates/people.xml b/agentskills/docx/scripts/templates/people.xml new file mode 100644 index 0000000..3803d2d --- /dev/null +++ b/agentskills/docx/scripts/templates/people.xml @@ -0,0 +1,3 @@ + + + diff --git a/agentskills/frontend-design/LICENSE.txt b/agentskills/frontend-design/LICENSE.txt new file mode 100644 index 0000000..f433b1a --- /dev/null +++ b/agentskills/frontend-design/LICENSE.txt @@ -0,0 +1,177 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/agentskills/frontend-design/SKILL.md b/agentskills/frontend-design/SKILL.md new file mode 100644 index 0000000..5be498e --- /dev/null +++ b/agentskills/frontend-design/SKILL.md @@ -0,0 +1,42 @@ +--- +name: frontend-design +description: Create distinctive, production-grade frontend interfaces with high design quality. Use this skill when the user asks to build web components, pages, artifacts, posters, or applications (examples include websites, landing pages, dashboards, React components, HTML/CSS layouts, or when styling/beautifying any web UI). Generates creative, polished code and UI design that avoids generic AI aesthetics. +license: Complete terms in LICENSE.txt +--- + +This skill guides creation of distinctive, production-grade frontend interfaces that avoid generic "AI slop" aesthetics. Implement real working code with exceptional attention to aesthetic details and creative choices. + +The user provides frontend requirements: a component, page, application, or interface to build. They may include context about the purpose, audience, or technical constraints. + +## Design Thinking + +Before coding, understand the context and commit to a BOLD aesthetic direction: +- **Purpose**: What problem does this interface solve? Who uses it? +- **Tone**: Pick an extreme: brutally minimal, maximalist chaos, retro-futuristic, organic/natural, luxury/refined, playful/toy-like, editorial/magazine, brutalist/raw, art deco/geometric, soft/pastel, industrial/utilitarian, etc. There are so many flavors to choose from. Use these for inspiration but design one that is true to the aesthetic direction. +- **Constraints**: Technical requirements (framework, performance, accessibility). +- **Differentiation**: What makes this UNFORGETTABLE? What's the one thing someone will remember? + +**CRITICAL**: Choose a clear conceptual direction and execute it with precision. Bold maximalism and refined minimalism both work - the key is intentionality, not intensity. + +Then implement working code (HTML/CSS/JS, React, Vue, etc.) that is: +- Production-grade and functional +- Visually striking and memorable +- Cohesive with a clear aesthetic point-of-view +- Meticulously refined in every detail + +## Frontend Aesthetics Guidelines + +Focus on: +- **Typography**: Choose fonts that are beautiful, unique, and interesting. Avoid generic fonts like Arial and Inter; opt instead for distinctive choices that elevate the frontend's aesthetics; unexpected, characterful font choices. Pair a distinctive display font with a refined body font. +- **Color & Theme**: Commit to a cohesive aesthetic. Use CSS variables for consistency. Dominant colors with sharp accents outperform timid, evenly-distributed palettes. +- **Motion**: Use animations for effects and micro-interactions. Prioritize CSS-only solutions for HTML. Use Motion library for React when available. Focus on high-impact moments: one well-orchestrated page load with staggered reveals (animation-delay) creates more delight than scattered micro-interactions. Use scroll-triggering and hover states that surprise. +- **Spatial Composition**: Unexpected layouts. Asymmetry. Overlap. Diagonal flow. Grid-breaking elements. Generous negative space OR controlled density. +- **Backgrounds & Visual Details**: Create atmosphere and depth rather than defaulting to solid colors. Add contextual effects and textures that match the overall aesthetic. Apply creative forms like gradient meshes, noise textures, geometric patterns, layered transparencies, dramatic shadows, decorative borders, custom cursors, and grain overlays. + +NEVER use generic AI-generated aesthetics like overused font families (Inter, Roboto, Arial, system fonts), cliched color schemes (particularly purple gradients on white backgrounds), predictable layouts and component patterns, and cookie-cutter design that lacks context-specific character. + +Interpret creatively and make unexpected choices that feel genuinely designed for the context. No design should be the same. Vary between light and dark themes, different fonts, different aesthetics. NEVER converge on common choices (Space Grotesk, for example) across generations. + +**IMPORTANT**: Match implementation complexity to the aesthetic vision. Maximalist designs need elaborate code with extensive animations and effects. Minimalist or refined designs need restraint, precision, and careful attention to spacing, typography, and subtle details. Elegance comes from executing the vision well. + +Remember: Claude is capable of extraordinary creative work. Don't hold back, show what can truly be created when thinking outside the box and committing fully to a distinctive vision. diff --git a/agentskills/test/SKILL.md b/agentskills/test/SKILL.md new file mode 100644 index 0000000..7a7b311 --- /dev/null +++ b/agentskills/test/SKILL.md @@ -0,0 +1,15 @@ +--- +name: Test Skill +description: Validate skills loading and read_file flow. +example_prompts: + - "Use the test skill to explain how to proceed." +tools: + - read_file +tags: + - test +--- + +# Test Skill + +This skill exists to verify that the skills system can discover, list, sync, +and read SKILL.md content via read_file. Replace this content later. diff --git a/config/paths.py b/config/paths.py index 6ae8675..3e897ca 100644 --- a/config/paths.py +++ b/config/paths.py @@ -9,6 +9,8 @@ HOST_PROJECT_PATH = os.environ.get("HOST_PROJECT_PATH", DEFAULT_PROJECT_PATH) PROMPTS_DIR = "./prompts" DATA_DIR = "./data" LOGS_DIR = "./logs" +AGENT_SKILLS_DIR = "./agentskills" +WORKSPACE_SKILLS_DIRNAME = "skills" # 多用户空间 USER_SPACE_DIR = "./users" @@ -28,6 +30,8 @@ __all__ = [ "PROMPTS_DIR", "DATA_DIR", "LOGS_DIR", + "AGENT_SKILLS_DIR", + "WORKSPACE_SKILLS_DIRNAME", "USER_SPACE_DIR", "USERS_DB_FILE", "INVITE_CODES_FILE", diff --git a/core/main_terminal.py b/core/main_terminal.py index b8f69a6..c463713 100644 --- a/core/main_terminal.py +++ b/core/main_terminal.py @@ -58,6 +58,12 @@ from modules.personalization_manager import ( load_personalization_config, build_personalization_prompt, ) +from modules.skills_manager import ( + get_skills_catalog, + build_skills_list, + merge_enabled_skills, + build_skills_prompt, +) from modules.custom_tool_registry import CustomToolRegistry, build_default_tool_category from modules.custom_tool_executor import CustomToolExecutor try: @@ -2514,6 +2520,19 @@ class MainTerminal: {"role": "system", "content": system_prompt} ] + personalization_config = getattr(self.context_manager, "custom_personalization_config", None) or load_personalization_config(self.data_dir) + skills_catalog = get_skills_catalog() + enabled_skills = merge_enabled_skills( + personalization_config.get("enabled_skills") if isinstance(personalization_config, dict) else None, + skills_catalog, + personalization_config.get("skills_catalog_snapshot") if isinstance(personalization_config, dict) else None, + ) + skills_template = self.load_prompt("skills_system").strip() + skills_list = build_skills_list(skills_catalog, enabled_skills) + skills_prompt = build_skills_prompt(skills_template, skills_list) + if skills_prompt: + messages.append({"role": "system", "content": skills_prompt}) + workspace_system = self.context_manager._build_workspace_system_message(context) if workspace_system: messages.append({"role": "system", "content": workspace_system}) @@ -2544,7 +2563,6 @@ class MainTerminal: messages.append({"role": "system", "content": thinking_prompt}) # 支持按对话覆盖的个性化配置 - personalization_config = getattr(self.context_manager, "custom_personalization_config", None) or load_personalization_config(self.data_dir) personalization_block = build_personalization_prompt(personalization_config, include_header=False) if personalization_block: personalization_template = self.load_prompt("personalization").strip() diff --git a/modules/api_user_manager.py b/modules/api_user_manager.py index e074410..3cd1b31 100644 --- a/modules/api_user_manager.py +++ b/modules/api_user_manager.py @@ -124,8 +124,9 @@ class ApiUserManager: data_dir = work_root / "data" logs_dir = work_root / "logs" uploads_dir = project_path / "user_upload" + skills_dir = project_path / "skills" - for path in (project_path, data_dir, logs_dir, uploads_dir, shared_dir, prompts_dir, personalization_dir): + for path in (project_path, data_dir, logs_dir, uploads_dir, skills_dir, shared_dir, prompts_dir, personalization_dir): path.mkdir(parents=True, exist_ok=True) # 数据子目录(工作区级) diff --git a/modules/personalization_manager.py b/modules/personalization_manager.py index 99fd6b8..b4e6e26 100644 --- a/modules/personalization_manager.py +++ b/modules/personalization_manager.py @@ -33,6 +33,8 @@ DEFAULT_PERSONALIZATION_CONFIG: Dict[str, Any] = { "considerations": [], "thinking_interval": None, "disabled_tool_categories": [], + "enabled_skills": None, + "skills_catalog_snapshot": None, "default_run_mode": None, "auto_generate_title": True, "tool_intent_enabled": True, @@ -146,6 +148,16 @@ def sanitize_personalization_payload( else: base["disabled_tool_categories"] = _sanitize_tool_categories(base.get("disabled_tool_categories"), allowed_tool_categories) + if "enabled_skills" in data: + base["enabled_skills"] = _sanitize_skills(data.get("enabled_skills")) + else: + base["enabled_skills"] = _sanitize_skills(base.get("enabled_skills")) + + if "skills_catalog_snapshot" in data: + base["skills_catalog_snapshot"] = _sanitize_skills(data.get("skills_catalog_snapshot")) + else: + base["skills_catalog_snapshot"] = _sanitize_skills(base.get("skills_catalog_snapshot")) + if "default_run_mode" in data: base["default_run_mode"] = _sanitize_run_mode(data.get("default_run_mode")) else: @@ -174,6 +186,25 @@ def sanitize_personalization_payload( return base +def _sanitize_skills(value: Any) -> Optional[list]: + """Sanitize enabled skills list / 清洗启用技能列表。""" + if value is None: + return None + if not isinstance(value, list): + return [] + cleaned: list = [] + seen = set() + for item in value: + if not isinstance(item, str): + continue + skill_id = item.strip() + if not skill_id or skill_id in seen: + continue + cleaned.append(skill_id) + seen.add(skill_id) + return cleaned + + def save_personalization_config(base_dir: PathLike, payload: Dict[str, Any]) -> Dict[str, Any]: """Persist sanitized personalization config and return it.""" existing = load_personalization_config(base_dir) diff --git a/modules/skills_manager.py b/modules/skills_manager.py new file mode 100644 index 0000000..c4422e8 --- /dev/null +++ b/modules/skills_manager.py @@ -0,0 +1,211 @@ +"""Agent skills manager / 智能体技能管理器。 + +负责扫描全局 skills 库、生成可用清单,并同步到工作区 skills/。 +""" + +from __future__ import annotations + +import re +import shutil +from pathlib import Path +from typing import Dict, List, Optional, Sequence + +from config import AGENT_SKILLS_DIR, WORKSPACE_SKILLS_DIRNAME +from utils.logger import setup_logger + +logger = setup_logger(__name__) + +SKILL_FILE_NAME = "SKILL.md" +SKILL_ID_PATTERN = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_-]*$") + + +def ensure_agent_skills_dir(base_dir: Optional[str] = None) -> Path: + """Ensure the global skills directory exists / 确保全局技能目录存在。""" + root = Path(base_dir or AGENT_SKILLS_DIR).expanduser().resolve() + root.mkdir(parents=True, exist_ok=True) + return root + + +def ensure_workspace_skills_dir(project_path: str | Path) -> Path: + """Ensure workspace skills directory exists / 确保工作区 skills 目录存在。""" + root = Path(project_path).expanduser().resolve() + skills_dir = (root / WORKSPACE_SKILLS_DIRNAME).resolve() + skills_dir.mkdir(parents=True, exist_ok=True) + return skills_dir + + +def _parse_frontmatter(text: str) -> Dict[str, str]: + """Parse simple YAML frontmatter / 解析简单 YAML 头信息。""" + lines = text.splitlines() + if not lines or lines[0].strip() != "---": + return {} + end_idx = None + for idx in range(1, len(lines)): + if lines[idx].strip() == "---": + end_idx = idx + break + if end_idx is None: + return {} + meta: Dict[str, str] = {} + for line in lines[1:end_idx]: + if ":" not in line: + continue + key, value = line.split(":", 1) + key = key.strip() + value = value.strip().strip('"').strip("'") + if key and value: + meta[key] = value + return meta + + +def _is_valid_skill_id(name: str) -> bool: + """Validate skill id / 校验技能目录名。""" + return bool(name and SKILL_ID_PATTERN.match(name)) + + +def get_skills_catalog(base_dir: Optional[str] = None) -> List[Dict[str, str]]: + """List available skills from global library / 扫描全局技能库。""" + root = Path(base_dir or AGENT_SKILLS_DIR).expanduser().resolve() + if not root.exists() or not root.is_dir(): + return [] + catalog: List[Dict[str, str]] = [] + for child in sorted(root.iterdir()): + if not child.is_dir(): + continue + if not _is_valid_skill_id(child.name): + continue + skill_file = child / SKILL_FILE_NAME + if not skill_file.exists(): + continue + meta: Dict[str, str] = {} + try: + meta = _parse_frontmatter(skill_file.read_text(encoding="utf-8")) + except Exception: + meta = {} + label = meta.get("name") or child.name + description = meta.get("description") or "" + catalog.append({ + "id": child.name, + "label": label, + "description": description, + }) + return catalog + + +def resolve_enabled_skills( + enabled_skills: Optional[Sequence[str]], + catalog: Sequence[Dict[str, str]], +) -> List[str]: + """Resolve enabled skills list / 解析启用技能列表。""" + catalog_ids = [item.get("id", "") for item in catalog if item.get("id")] + if enabled_skills is None: + return catalog_ids + if not isinstance(enabled_skills, (list, tuple)): + return catalog_ids + allowed = set(catalog_ids) + seen = set() + resolved: List[str] = [] + for item in enabled_skills: + if not isinstance(item, str): + continue + skill_id = item.strip() + if not skill_id or skill_id in seen or skill_id not in allowed: + continue + resolved.append(skill_id) + seen.add(skill_id) + return resolved + + +def build_skills_list( + catalog: Sequence[Dict[str, str]], + enabled_skill_ids: Sequence[str], +) -> List[str]: + """Build skills list lines / 生成 skills 列表行。""" + if not enabled_skill_ids: + return [] + lookup = {item.get("id"): item for item in catalog if item.get("id")} + lines: List[str] = [] + for skill_id in enabled_skill_ids: + meta = lookup.get(skill_id) + if not meta: + continue + description = (meta.get("description") or "").strip() + if description: + lines.append(f"skills/{skill_id}:{description}") + else: + lines.append(f"skills/{skill_id}") + return lines + + +def merge_enabled_skills( + enabled_skill_ids: Optional[Sequence[str]], + catalog: Sequence[Dict[str, str]], + catalog_snapshot: Optional[Sequence[str]] = None, +) -> List[str]: + """Merge enabled skills with new catalog items / 合并启用列表并默认开启新增技能。""" + catalog_ids = [item.get("id") for item in catalog if item.get("id")] + if enabled_skill_ids is None: + base = list(catalog_ids) + else: + base_set = {item for item in enabled_skill_ids if isinstance(item, str)} + base = [item for item in catalog_ids if item in base_set] + if catalog_snapshot: + snapshot_set = {item for item in catalog_snapshot if isinstance(item, str)} + new_items = [item for item in catalog_ids if item not in snapshot_set] + for item in new_items: + if item not in base: + base.append(item) + return base + + +def build_skills_prompt(template: str, skills_list: Sequence[str]) -> str: + """Build skills prompt from template / 根据模板生成 skills 提示。""" + if not template: + return "" + list_block = "\n".join(skills_list) if skills_list else "" + content = template + if list_block: + content = content.replace("{skills_list}", list_block) + content = re.sub(r"\[skills_empty\].*?\[/skills_empty\]\n?", "", content, flags=re.S) + else: + content = content.replace("{skills_list}", "") + content = content.replace("[skills_empty]", "").replace("[/skills_empty]", "") + return content.strip() + + +def sync_workspace_skills( + project_path: str | Path, + enabled_skills: Optional[Sequence[str]] = None, + base_dir: Optional[str] = None, +) -> Dict[str, object]: + """Sync global skills into workspace / 将全局 skills 同步到工作区。""" + root = Path(project_path).expanduser().resolve() + skills_dir = (root / WORKSPACE_SKILLS_DIRNAME).resolve() + try: + skills_dir.relative_to(root) + except Exception: + return {"success": False, "error": "skills 目录不在项目路径内"} + + ensure_agent_skills_dir(base_dir) + catalog = get_skills_catalog(base_dir) + resolved = resolve_enabled_skills(enabled_skills, catalog) + + try: + if skills_dir.exists(): + shutil.rmtree(skills_dir) + skills_dir.mkdir(parents=True, exist_ok=True) + global_root = Path(base_dir or AGENT_SKILLS_DIR).expanduser().resolve() + for skill_id in resolved: + src = global_root / skill_id + if not src.exists() or not src.is_dir(): + continue + shutil.copytree(src, skills_dir / skill_id) + return { + "success": True, + "copied": list(resolved), + "available": [item.get("id") for item in catalog], + "target": str(skills_dir), + } + except Exception as exc: + logger.error("同步 skills 失败: %s", exc, exc_info=True) + return {"success": False, "error": str(exc), "target": str(skills_dir)} diff --git a/modules/user_manager.py b/modules/user_manager.py index 507a6d7..197d5d2 100644 --- a/modules/user_manager.py +++ b/modules/user_manager.py @@ -124,8 +124,9 @@ class UserManager: data_dir = root / "data" logs_dir = root / "logs" uploads_dir = project_path / "user_upload" + skills_dir = project_path / "skills" - for path in [project_path, data_dir, logs_dir, uploads_dir]: + for path in [project_path, data_dir, logs_dir, uploads_dir, skills_dir]: path.mkdir(parents=True, exist_ok=True) # 初始化数据子目录 diff --git a/prompts/skills_system.txt b/prompts/skills_system.txt new file mode 100644 index 0000000..c3c98ff --- /dev/null +++ b/prompts/skills_system.txt @@ -0,0 +1,4 @@ +agent skills系统已启用,以下是可用的skills(含简要说明) +{skills_list} +[skills_empty]暂无可用的skills[/skills_empty] +使用技能时,先用 run_command 查看对应 skills// 目录内容,再用 read_file 阅读 SKILL.md 或相关文件获取具体指导 diff --git a/server/chat.py b/server/chat.py index e4f5f19..4b87b45 100644 --- a/server/chat.py +++ b/server/chat.py @@ -19,6 +19,11 @@ from modules.personalization_manager import ( THINKING_INTERVAL_MIN, THINKING_INTERVAL_MAX, ) +from modules.skills_manager import ( + get_skills_catalog, + merge_enabled_skills, + sync_workspace_skills, +) from modules.upload_security import UploadSecurityError from modules.user_manager import UserWorkspace from core.web_terminal import WebTerminal @@ -169,10 +174,19 @@ def get_personalization_settings(terminal: WebTerminal, workspace: UserWorkspace if policy.get("ui_blocks", {}).get("block_personal_space"): return jsonify({"success": False, "error": "个人空间已被管理员禁用"}), 403 data = load_personalization_config(workspace.data_dir) + skills_catalog = get_skills_catalog() + enabled_skills = merge_enabled_skills( + data.get("enabled_skills"), + skills_catalog, + data.get("skills_catalog_snapshot"), + ) + data_out = dict(data) + data_out["enabled_skills"] = enabled_skills return jsonify({ "success": True, - "data": data, + "data": data_out, "tool_categories": terminal.get_tool_settings_snapshot(), + "skills_catalog": skills_catalog, "thinking_interval_default": THINKING_FAST_INTERVAL, "thinking_interval_range": { "min": THINKING_INTERVAL_MIN, @@ -195,6 +209,23 @@ def update_personalization_settings(terminal: WebTerminal, workspace: UserWorksp if policy.get("ui_blocks", {}).get("block_personal_space"): return jsonify({"success": False, "error": "个人空间已被管理员禁用"}), 403 config = save_personalization_config(workspace.data_dir, payload) + skills_catalog = get_skills_catalog() + enabled_skills = merge_enabled_skills( + config.get("enabled_skills"), + skills_catalog, + config.get("skills_catalog_snapshot"), + ) + stored_skills = None if len(enabled_skills) == len(skills_catalog) else enabled_skills + catalog_snapshot = [item.get("id") for item in skills_catalog if item.get("id")] + if config.get("enabled_skills") != stored_skills or config.get("skills_catalog_snapshot") != catalog_snapshot: + config = dict(config) + config["enabled_skills"] = stored_skills + config["skills_catalog_snapshot"] = catalog_snapshot + config = save_personalization_config(workspace.data_dir, config) + try: + sync_workspace_skills(workspace.project_path, enabled_skills) + except Exception as sync_exc: + debug_log(f"[Skills] 同步失败: {sync_exc}") try: terminal.apply_personalization_preferences(config) session['run_mode'] = terminal.run_mode @@ -219,10 +250,13 @@ def update_personalization_settings(terminal: WebTerminal, workspace: UserWorksp debug_log(f"广播个性化状态失败: {status_exc}") except Exception as exc: debug_log(f"应用个性化偏好失败: {exc}") + config_out = dict(config) + config_out["enabled_skills"] = enabled_skills return jsonify({ "success": True, - "data": config, + "data": config_out, "tool_categories": terminal.get_tool_settings_snapshot(), + "skills_catalog": skills_catalog, "thinking_interval_default": THINKING_FAST_INTERVAL, "thinking_interval_range": { "min": THINKING_INTERVAL_MIN, diff --git a/server/context.py b/server/context.py index ad5d5a6..f955e90 100644 --- a/server/context.py +++ b/server/context.py @@ -65,6 +65,8 @@ def get_user_resources(username: Optional[str] = None, workspace_id: Optional[st logs_dir.mkdir(parents=True, exist_ok=True) uploads_dir = project_path / "user_upload" uploads_dir.mkdir(parents=True, exist_ok=True) + skills_dir = project_path / "skills" + skills_dir.mkdir(parents=True, exist_ok=True) quarantine_root = Path(UPLOAD_QUARANTINE_SUBDIR).expanduser() if not quarantine_root.is_absolute(): quarantine_root = (project_path.parent / UPLOAD_QUARANTINE_SUBDIR).resolve() diff --git a/static/src/components/personalization/PersonalizationDrawer.vue b/static/src/components/personalization/PersonalizationDrawer.vue index 7c6e104..ea31953 100644 --- a/static/src/components/personalization/PersonalizationDrawer.vue +++ b/static/src/components/personalization/PersonalizationDrawer.vue @@ -463,6 +463,46 @@ +
+
+
+
+ 可用 Skills +

勾选后会注入 system prompt,并同步到工作区的 skills/ 目录。

+
+
+ +
+

暂无可用的 skills。

+
+
+
+
+
+ + {{ status }} + + + {{ error }} + +
+ +
+
+
@@ -635,6 +675,7 @@ const { saving, toggleUpdating, toolCategories, + skillsCatalog, thinkingIntervalDefault, thinkingIntervalRange, experiments @@ -644,12 +685,13 @@ const baseTabs = [ { id: 'preferences', label: '个性化设置', description: '称呼、语气与注意事项' }, { id: 'model', label: '模型偏好', description: '默认模型选择' }, { id: 'behavior', label: '模型行为', description: '工具提示与界面表现' }, + { id: 'skills', label: 'Skills', description: '可用技能开关' }, { id: 'image', label: '图片压缩', description: '发送图片的尺寸策略' }, { id: 'theme', label: '主题切换', description: '浅色 / 深色 / Claude' }, { id: 'experiments', label: '实验功能', description: 'Liquid Glass' } ] as const; -type PersonalTab = 'preferences' | 'model' | 'behavior' | 'image' | 'theme' | 'experiments' | 'admin-monitor'; +type PersonalTab = 'preferences' | 'model' | 'behavior' | 'skills' | 'image' | 'theme' | 'experiments' | 'admin-monitor'; const isAdmin = computed(() => (resourceStore.usageQuota.role || '').toLowerCase() === 'admin'); diff --git a/static/src/stores/personalization.ts b/static/src/stores/personalization.ts index c5cfd84..6d929fb 100644 --- a/static/src/stores/personalization.ts +++ b/static/src/stores/personalization.ts @@ -7,6 +7,7 @@ interface PersonalForm { auto_generate_title: boolean; tool_intent_enabled: boolean; silent_tool_disable: boolean; + enabled_skills: string[]; self_identify: string; user_name: string; profession: string; @@ -45,6 +46,7 @@ interface PersonalizationState { draggedConsiderationIndex: number | null; form: PersonalForm; toolCategories: Array<{ id: string; label: string }>; + skillsCatalog: Array<{ id: string; label: string; description?: string }>; thinkingIntervalDefault: number; thinkingIntervalRange: { min: number; max: number }; experiments: ExperimentState; @@ -60,6 +62,7 @@ const defaultForm = (): PersonalForm => ({ auto_generate_title: true, tool_intent_enabled: true, silent_tool_disable: false, + enabled_skills: [], self_identify: '', user_name: '', profession: '', @@ -127,6 +130,7 @@ export const usePersonalizationStore = defineStore('personalization', { draggedConsiderationIndex: null, form: defaultForm(), toolCategories: [], + skillsCatalog: [], thinkingIntervalDefault: DEFAULT_INTERVAL, thinkingIntervalRange: { ...DEFAULT_INTERVAL_RANGE }, experiments: loadExperimentState() @@ -189,6 +193,9 @@ export const usePersonalizationStore = defineStore('personalization', { auto_generate_title: data.auto_generate_title !== false, tool_intent_enabled: !!data.tool_intent_enabled, silent_tool_disable: !!data.silent_tool_disable, + enabled_skills: Array.isArray(data.enabled_skills) + ? data.enabled_skills.filter((item: unknown) => typeof item === 'string') + : [], self_identify: data.self_identify || '', user_name: data.user_name || '', profession: data.profession || '', @@ -230,6 +237,17 @@ export const usePersonalizationStore = defineStore('personalization', { } else { this.toolCategories = []; } + if (payload && Array.isArray(payload.skills_catalog)) { + this.skillsCatalog = payload.skills_catalog + .map((item: { id?: string; label?: string; description?: string } = {}) => ({ + id: typeof item.id === 'string' ? item.id : String(item.id ?? ''), + label: (item.label && String(item.label)) || (typeof item.id === 'string' ? item.id : String(item.id ?? '')), + description: typeof item.description === 'string' ? item.description : undefined + })) + .filter((item: { id: string }) => !!item.id); + } else { + this.skillsCatalog = []; + } }, clearFeedback() { this.status = ''; @@ -351,6 +369,22 @@ export const usePersonalizationStore = defineStore('personalization', { }; this.clearFeedback(); }, + toggleSkill(skillId: string) { + if (!skillId) { + return; + } + const current = new Set(this.form.enabled_skills || []); + if (current.has(skillId)) { + current.delete(skillId); + } else { + current.add(skillId); + } + this.form = { + ...this.form, + enabled_skills: Array.from(current) + }; + this.clearFeedback(); + }, setDefaultRunMode(mode: RunMode | null) { let target: RunMode | null = null; if (typeof mode === 'string' && RUN_MODE_OPTIONS.includes(mode as RunMode)) { diff --git a/utils/context_manager.py b/utils/context_manager.py index 2f5b829..97af613 100644 --- a/utils/context_manager.py +++ b/utils/context_manager.py @@ -325,6 +325,18 @@ class ContextManager: # 保存当前对话(如果有的话) if self.current_conversation_id and self.conversation_history: self.save_current_conversation() + + # 同步 skills(每次新对话覆盖镜像) + try: + from modules.personalization_manager import load_personalization_config + from modules.skills_manager import sync_workspace_skills + personalization_config = getattr(self, "custom_personalization_config", None) or load_personalization_config(self.data_dir) + enabled_skills = None + if isinstance(personalization_config, dict): + enabled_skills = personalization_config.get("enabled_skills") + sync_workspace_skills(self.project_path, enabled_skills) + except Exception as exc: + print(f"[Skills] 同步失败: {exc}") # 创建新对话 conversation_id = self.conversation_manager.create_conversation( @@ -1516,6 +1528,29 @@ class ContextManager: {"role": "system", "content": system_prompt} ] + try: + from modules.personalization_manager import load_personalization_config + from modules.skills_manager import ( + get_skills_catalog, + build_skills_list, + merge_enabled_skills, + build_skills_prompt, + ) + personalization_config = getattr(self, "custom_personalization_config", None) or load_personalization_config(self.data_dir) + skills_catalog = get_skills_catalog() + enabled_skills = merge_enabled_skills( + personalization_config.get("enabled_skills") if isinstance(personalization_config, dict) else None, + skills_catalog, + personalization_config.get("skills_catalog_snapshot") if isinstance(personalization_config, dict) else None, + ) + skills_template = self.load_prompt("skills_system").strip() + skills_list = build_skills_list(skills_catalog, enabled_skills) + skills_prompt = build_skills_prompt(skills_template, skills_list) + if skills_prompt: + messages.append({"role": "system", "content": skills_prompt}) + except Exception as exc: + print(f"[Skills] 系统提示生成失败: {exc}") + workspace_system = self._build_workspace_system_message(context) if workspace_system: messages.append({"role": "system", "content": workspace_system})