feat(basecamp-project): add markdown to trix converter
- Add markdown_to_trix.py with Markdown→HTML conversion for Basecamp - Implement is_basecamp_safe() for unsupported feature detection - Add convert_table_to_lists() for table→list conversion - Include CLI with --file, --output, and --check modes - Add comprehensive self-tests for all functions - Add requirements.txt with markdown dependency
This commit is contained in:
481
skills/basecamp-project/scripts/markdown_to_trix.py
Executable file
481
skills/basecamp-project/scripts/markdown_to_trix.py
Executable file
@@ -0,0 +1,481 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Markdown to Basecamp Trix HTML Converter.
|
||||
|
||||
Converts Markdown text to Basecamp-compatible HTML in Trix format.
|
||||
Handles Basecamp-specific limitations and provides safety checks.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
try:
|
||||
import markdown as markdown_lib
|
||||
MARKDOWN_AVAILABLE = True
|
||||
except ImportError:
|
||||
MARKDOWN_AVAILABLE = False
|
||||
markdown_lib = None # type: ignore[assignment-assign]
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(levelname)s: %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def markdown_to_trix(markdown_text: str) -> str:
|
||||
"""
|
||||
Convert Markdown text to Basecamp-compatible HTML (Trix format).
|
||||
|
||||
Args:
|
||||
markdown_text: The Markdown content to convert.
|
||||
|
||||
Returns:
|
||||
HTML string compatible with Basecamp Trix editor.
|
||||
|
||||
Raises:
|
||||
ImportError: If markdown library is not installed.
|
||||
"""
|
||||
if not MARKDOWN_AVAILABLE:
|
||||
raise ImportError(
|
||||
"The 'markdown' library is required. Install with: pip install markdown"
|
||||
)
|
||||
|
||||
# Strip potential HTML comments or existing HTML that might cause issues
|
||||
cleaned_text = _preprocess_markdown(markdown_text)
|
||||
|
||||
# Configure markdown with safe extensions
|
||||
md = markdown_lib.Markdown( # type: ignore[attr-defined]
|
||||
extensions=[
|
||||
"tables",
|
||||
"fenced_code",
|
||||
"codehilite",
|
||||
"nl2br",
|
||||
"sane_lists",
|
||||
],
|
||||
extension_configs={
|
||||
"codehilite": {"css_class": "highlight"},
|
||||
},
|
||||
output_format="html",
|
||||
)
|
||||
|
||||
# Convert markdown to HTML
|
||||
html = md.convert(cleaned_text)
|
||||
|
||||
# Post-process for Basecamp compatibility
|
||||
html = _post_process_html(html)
|
||||
|
||||
return html
|
||||
|
||||
|
||||
def _preprocess_markdown(text: str) -> str:
|
||||
"""Preprocess markdown text before conversion."""
|
||||
lines = text.split("\n")
|
||||
processed_lines = []
|
||||
|
||||
for line in lines:
|
||||
# Preserve checkboxes but mark them for post-processing
|
||||
# Basecamp supports checkboxes only in to-dos, not messages
|
||||
if re.match(r"^(\s*)-\s*\[[\sXx]\]\s+", line):
|
||||
# Mark checkbox lines for later handling
|
||||
processed_lines.append(line)
|
||||
else:
|
||||
processed_lines.append(line)
|
||||
|
||||
return "\n".join(processed_lines)
|
||||
|
||||
|
||||
def _post_process_html(html: str) -> str:
|
||||
"""
|
||||
Post-process HTML for Basecamp Trix compatibility.
|
||||
|
||||
- Removes horizontal rules or converts them to styled divs
|
||||
- Cleans up unnecessary tags
|
||||
- Ensures proper inline formatting
|
||||
"""
|
||||
# Remove empty paragraphs
|
||||
html = re.sub(r"<p>\s*</p>", "", html)
|
||||
|
||||
# Convert horizontal rules to Basecamp-friendly separators
|
||||
# Basecamp may not render <hr> correctly, use styled div instead
|
||||
html = re.sub(
|
||||
r"<hr\s*/?>",
|
||||
'<div style="border-bottom: 1px solid #e0e0e0; margin: 16px 0;"></div>',
|
||||
html,
|
||||
)
|
||||
|
||||
# Clean up multiple consecutive breaks
|
||||
html = re.sub(r"(<br\s*/?>){3,}", "<br><br>", html)
|
||||
|
||||
# Ensure links open in new tab (Basecamp best practice)
|
||||
html = re.sub(
|
||||
r'<a href="([^"]+)"',
|
||||
r'<a href="\1" target="_blank" rel="noopener noreferrer"',
|
||||
html,
|
||||
)
|
||||
|
||||
# Wrap code blocks properly for Basecamp
|
||||
html = re.sub(
|
||||
r'<pre><code class="([^"]*)">',
|
||||
r'<pre class="\1"><code>',
|
||||
html,
|
||||
)
|
||||
|
||||
return html
|
||||
|
||||
|
||||
def is_basecamp_safe(markdown_text: str) -> list[str]:
|
||||
"""
|
||||
Check Markdown for unsupported Basecamp features.
|
||||
|
||||
Args:
|
||||
markdown_text: The Markdown content to check.
|
||||
|
||||
Returns:
|
||||
List of warning messages for unsupported features found.
|
||||
"""
|
||||
warnings: list[str] = []
|
||||
|
||||
# Check for tables
|
||||
if re.search(r"^\|.*\|.*$", markdown_text, re.MULTILINE):
|
||||
logger.debug("Found table syntax in markdown")
|
||||
warnings.append(
|
||||
"Tables not supported: convert to structured lists"
|
||||
)
|
||||
|
||||
# Check for checkboxes (only in document context, not todo lists)
|
||||
if re.search(r"^(\s*)-\s*\[[\sXx]\]\s+", markdown_text, re.MULTILINE):
|
||||
logger.debug("Found checkbox syntax in markdown")
|
||||
warnings.append(
|
||||
"Checkboxes not supported in messages: create as todos instead"
|
||||
)
|
||||
|
||||
# Check for horizontal rules
|
||||
if re.search(r"^[-*_]{3,}\s*$", markdown_text, re.MULTILINE):
|
||||
logger.debug("Found horizontal rule syntax")
|
||||
warnings.append(
|
||||
"Horizontal rules may not render correctly"
|
||||
)
|
||||
|
||||
# Check for complex nested structures that might not render
|
||||
if markdown_text.count(" ") > 10:
|
||||
logger.debug("Found deeply nested indentation")
|
||||
warnings.append(
|
||||
"Deep nesting may not render correctly: consider flattening structure"
|
||||
)
|
||||
|
||||
return warnings
|
||||
|
||||
|
||||
def convert_table_to_lists(table_md: str) -> str:
|
||||
"""
|
||||
Convert a Markdown table to nested lists in Basecamp-friendly format.
|
||||
|
||||
Args:
|
||||
table_md: The Markdown table content (without outer pipes).
|
||||
|
||||
Returns:
|
||||
Nested list representation suitable for Basecamp.
|
||||
|
||||
Example:
|
||||
Input:
|
||||
| Name | Value |
|
||||
|------|-------|
|
||||
| Foo | Bar |
|
||||
|
||||
Output:
|
||||
Name:
|
||||
- Name: Foo
|
||||
- Value: Bar
|
||||
"""
|
||||
lines = [line.strip() for line in table_md.strip().split("\n") if line.strip()]
|
||||
|
||||
if len(lines) < 2:
|
||||
return table_md
|
||||
|
||||
# Parse header
|
||||
header_match = re.match(r"\|(.+)\|", lines[0])
|
||||
if not header_match:
|
||||
return table_md
|
||||
|
||||
headers = [h.strip() for h in header_match.group(1).split("|")]
|
||||
|
||||
# Skip separator line if present
|
||||
data_lines = lines[1:]
|
||||
if data_lines and re.match(r"^\|[-:\s|]+\|$", data_lines[0]):
|
||||
data_lines = data_lines[1:]
|
||||
|
||||
result_parts: list[str] = []
|
||||
|
||||
for line in data_lines:
|
||||
row_match = re.match(r"\|(.+)\|", line)
|
||||
if not row_match:
|
||||
continue
|
||||
|
||||
values = [v.strip() for v in row_match.group(1).split("|")]
|
||||
|
||||
# Create nested list for each row
|
||||
row_items: list[str] = []
|
||||
for header, value in zip(headers, values):
|
||||
row_items.append(f" - {header}: {value}")
|
||||
|
||||
if row_items:
|
||||
result_parts.append("\n".join(row_items))
|
||||
|
||||
return "\n\n".join(result_parts)
|
||||
|
||||
|
||||
def convert_checkbox_to_text(line: str) -> str:
|
||||
"""
|
||||
Convert a checkbox line to plain text.
|
||||
|
||||
Args:
|
||||
line: A markdown line with checkbox syntax.
|
||||
|
||||
Returns:
|
||||
Plain text representation with checkbox state indicated.
|
||||
"""
|
||||
# Match: optional_indent - [ ] or - [x] or - [X]
|
||||
match = re.match(r"^(\s*-\s*)\[[\sXx]\]\s+(.*)", line)
|
||||
if match:
|
||||
indent, text = match.groups()
|
||||
checkbox_char = "☐" if " " in match.group(0) else "☑"
|
||||
return f"{indent}{checkbox_char} {text}"
|
||||
|
||||
return line
|
||||
|
||||
|
||||
def _cli_check_mode(filename: str) -> int:
|
||||
"""
|
||||
Run in check mode: only output warnings without conversion.
|
||||
|
||||
Args:
|
||||
filename: Path to the markdown file.
|
||||
|
||||
Returns:
|
||||
Exit code: 0 if no warnings, 1 if warnings found, 2 on file error.
|
||||
"""
|
||||
try:
|
||||
with open(filename, "r", encoding="utf-8") as f:
|
||||
markdown_text = f.read()
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File not found: {filename}", file=sys.stderr)
|
||||
return 2
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied: {filename}", file=sys.stderr)
|
||||
return 2
|
||||
except Exception as e:
|
||||
print(f"Error reading file: {e}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
warnings = is_basecamp_safe(markdown_text)
|
||||
|
||||
if warnings:
|
||||
print(f"Warnings for {filename}:")
|
||||
for warning in warnings:
|
||||
print(f" ⚠️ {warning}")
|
||||
return 1
|
||||
else:
|
||||
print(f"✅ No unsupported features found in {filename}")
|
||||
return 0
|
||||
|
||||
|
||||
def _cli_convert_mode(filename: str, output: Optional[str]) -> int:
|
||||
"""
|
||||
Run in convert mode: convert markdown to HTML.
|
||||
|
||||
Args:
|
||||
filename: Path to the markdown file.
|
||||
output: Optional path for output file.
|
||||
|
||||
Returns:
|
||||
Exit code: 0 on success, 1 on warnings, 2 on error.
|
||||
"""
|
||||
try:
|
||||
with open(filename, "r", encoding="utf-8") as f:
|
||||
markdown_text = f.read()
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File not found: {filename}", file=sys.stderr)
|
||||
return 2
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied: {filename}", file=sys.stderr)
|
||||
return 2
|
||||
except Exception as e:
|
||||
print(f"Error reading file: {e}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
# Show warnings before conversion
|
||||
warnings = is_basecamp_safe(markdown_text)
|
||||
if warnings:
|
||||
print(f"Warnings for {filename}:")
|
||||
for warning in warnings:
|
||||
print(f" ⚠️ {warning}")
|
||||
print()
|
||||
|
||||
# Check for markdown library
|
||||
if not MARKDOWN_AVAILABLE:
|
||||
print(
|
||||
"Error: markdown library not installed.",
|
||||
file=sys.stderr
|
||||
)
|
||||
print(
|
||||
"Install with: pip install markdown",
|
||||
file=sys.stderr
|
||||
)
|
||||
return 2
|
||||
|
||||
try:
|
||||
html_output = markdown_to_trix(markdown_text)
|
||||
except Exception as e:
|
||||
print(f"Error converting markdown: {e}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
if output:
|
||||
try:
|
||||
with open(output, "w", encoding="utf-8") as f:
|
||||
f.write(html_output)
|
||||
print(f"✅ Converted {filename} → {output}")
|
||||
except Exception as e:
|
||||
print(f"Error writing output: {e}", file=sys.stderr)
|
||||
return 2
|
||||
else:
|
||||
# Write to stdout
|
||||
print(html_output)
|
||||
|
||||
return 1 if warnings else 0
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""
|
||||
CLI entry point.
|
||||
|
||||
Returns:
|
||||
Exit code: 0=OK, 1=Warnungen, 2=Fehler.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="markdown_to_trix.py",
|
||||
description="Convert Markdown to Basecamp Trix HTML format.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--file",
|
||||
"-f",
|
||||
metavar="INPUT.md",
|
||||
help="Input Markdown file to convert",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
"-o",
|
||||
metavar="OUTPUT.html",
|
||||
help="Output HTML file (default: stdout)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--check",
|
||||
"-c",
|
||||
metavar="INPUT.md",
|
||||
help="Only check for unsupported features, don't convert",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
"-v",
|
||||
action="store_true",
|
||||
help="Enable verbose (debug) logging",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
||||
# Handle mutually exclusive modes
|
||||
if args.check:
|
||||
return _cli_check_mode(args.check)
|
||||
|
||||
if args.file:
|
||||
return _cli_convert_mode(args.file, args.output)
|
||||
|
||||
# No mode specified, show help
|
||||
parser.print_help()
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Parse arguments first
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--check", "-c", metavar="INPUT.md")
|
||||
parser.add_argument("--file", "-f", metavar="INPUT.md")
|
||||
parser.add_argument("--output", "-o", metavar="OUTPUT.html")
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
if args.check or args.file:
|
||||
# CLI mode with arguments
|
||||
sys.exit(main())
|
||||
|
||||
# Run self-tests when executed without arguments
|
||||
print("Running self-test...")
|
||||
|
||||
test_cases = [
|
||||
{
|
||||
"name": "Basic conversion",
|
||||
"input": "# Hello\n\n**Bold** and *italic* text.",
|
||||
"expected_warnings": 0,
|
||||
},
|
||||
{
|
||||
"name": "Table detection",
|
||||
"input": "# Test\n\n| A | B |\n|---|---|\n| 1 | 2 |",
|
||||
"expected_warnings": 1,
|
||||
},
|
||||
{
|
||||
"name": "Checkbox detection",
|
||||
"input": "# Tasks\n\n- [ ] Task 1\n- [x] Task 2",
|
||||
"expected_warnings": 1,
|
||||
},
|
||||
{
|
||||
"name": "Horizontal rule",
|
||||
"input": "# Test\n\n---\n\nContent",
|
||||
"expected_warnings": 1,
|
||||
},
|
||||
{
|
||||
"name": "Safe markdown",
|
||||
"input": "# Title\n\n- Item 1\n- Item 2\n\n**Bold** and [link](https://example.com)",
|
||||
"expected_warnings": 0,
|
||||
},
|
||||
]
|
||||
|
||||
all_passed = True
|
||||
|
||||
for test in test_cases:
|
||||
warnings = is_basecamp_safe(test["input"])
|
||||
passed = len(warnings) == test["expected_warnings"]
|
||||
status = "✅ PASS" if passed else "❌ FAIL"
|
||||
print(f" {status}: {test['name']} (warnings: {len(warnings)})")
|
||||
if not passed:
|
||||
all_passed = False
|
||||
print(f" Expected: {test['expected_warnings']}, Got: {len(warnings)}")
|
||||
print(f" Warnings: {warnings}")
|
||||
|
||||
# Test table conversion
|
||||
print("\nTesting table conversion...")
|
||||
table_md = """
|
||||
| Name | Role | Email |
|
||||
|------|------|-------|
|
||||
| Alice | Dev | alice@example.com |
|
||||
| Bob | Design | bob@example.com |
|
||||
"""
|
||||
converted = convert_table_to_lists(table_md)
|
||||
print(f" Input:\n{table_md}")
|
||||
print(f" Output:\n{converted}")
|
||||
|
||||
if all_passed:
|
||||
print("\n✅ All self-tests passed!")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("\n❌ Some self-tests failed!")
|
||||
sys.exit(1)
|
||||
2
skills/basecamp-project/scripts/requirements.txt
Normal file
2
skills/basecamp-project/scripts/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
# Markdown to Trix Converter Dependencies
|
||||
markdown>=3.4.0
|
||||
Reference in New Issue
Block a user