- Rename skill/ to skills/ for consistency with naming conventions - Rename agent/ to agents/ and command/ to commands/ - Update AGENTS.md with all directory references - Update scripts/test-skill.sh paths - Update prompts/athena.txt documentation This aligns with best practices of using plural directory names and updates all documentation to reflect the new structure.
108 lines
3.5 KiB
Python
108 lines
3.5 KiB
Python
import json
|
|
import sys
|
|
|
|
from pypdf import PdfReader, PdfWriter
|
|
from pypdf.annotations import FreeText
|
|
|
|
|
|
# Fills a PDF by adding text annotations defined in `fields.json`. See forms.md.
|
|
|
|
|
|
def transform_coordinates(bbox, image_width, image_height, pdf_width, pdf_height):
|
|
"""Transform bounding box from image coordinates to PDF coordinates"""
|
|
# Image coordinates: origin at top-left, y increases downward
|
|
# PDF coordinates: origin at bottom-left, y increases upward
|
|
x_scale = pdf_width / image_width
|
|
y_scale = pdf_height / image_height
|
|
|
|
left = bbox[0] * x_scale
|
|
right = bbox[2] * x_scale
|
|
|
|
# Flip Y coordinates for PDF
|
|
top = pdf_height - (bbox[1] * y_scale)
|
|
bottom = pdf_height - (bbox[3] * y_scale)
|
|
|
|
return left, bottom, right, top
|
|
|
|
|
|
def fill_pdf_form(input_pdf_path, fields_json_path, output_pdf_path):
|
|
"""Fill the PDF form with data from fields.json"""
|
|
|
|
# `fields.json` format described in forms.md.
|
|
with open(fields_json_path, "r") as f:
|
|
fields_data = json.load(f)
|
|
|
|
# Open the PDF
|
|
reader = PdfReader(input_pdf_path)
|
|
writer = PdfWriter()
|
|
|
|
# Copy all pages to writer
|
|
writer.append(reader)
|
|
|
|
# Get PDF dimensions for each page
|
|
pdf_dimensions = {}
|
|
for i, page in enumerate(reader.pages):
|
|
mediabox = page.mediabox
|
|
pdf_dimensions[i + 1] = [mediabox.width, mediabox.height]
|
|
|
|
# Process each form field
|
|
annotations = []
|
|
for field in fields_data["form_fields"]:
|
|
page_num = field["page_number"]
|
|
|
|
# Get page dimensions and transform coordinates.
|
|
page_info = next(p for p in fields_data["pages"] if p["page_number"] == page_num)
|
|
image_width = page_info["image_width"]
|
|
image_height = page_info["image_height"]
|
|
pdf_width, pdf_height = pdf_dimensions[page_num]
|
|
|
|
transformed_entry_box = transform_coordinates(
|
|
field["entry_bounding_box"],
|
|
image_width, image_height,
|
|
pdf_width, pdf_height
|
|
)
|
|
|
|
# Skip empty fields
|
|
if "entry_text" not in field or "text" not in field["entry_text"]:
|
|
continue
|
|
entry_text = field["entry_text"]
|
|
text = entry_text["text"]
|
|
if not text:
|
|
continue
|
|
|
|
font_name = entry_text.get("font", "Arial")
|
|
font_size = str(entry_text.get("font_size", 14)) + "pt"
|
|
font_color = entry_text.get("font_color", "000000")
|
|
|
|
# Font size/color seems to not work reliably across viewers:
|
|
# https://github.com/py-pdf/pypdf/issues/2084
|
|
annotation = FreeText(
|
|
text=text,
|
|
rect=transformed_entry_box,
|
|
font=font_name,
|
|
font_size=font_size,
|
|
font_color=font_color,
|
|
border_color=None,
|
|
background_color=None,
|
|
)
|
|
annotations.append(annotation)
|
|
# page_number is 0-based for pypdf
|
|
writer.add_annotation(page_number=page_num - 1, annotation=annotation)
|
|
|
|
# Save the filled PDF
|
|
with open(output_pdf_path, "wb") as output:
|
|
writer.write(output)
|
|
|
|
print(f"Successfully filled PDF form and saved to {output_pdf_path}")
|
|
print(f"Added {len(annotations)} text annotations")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 4:
|
|
print("Usage: fill_pdf_form_with_annotations.py [input pdf] [fields.json] [output pdf]")
|
|
sys.exit(1)
|
|
input_pdf = sys.argv[1]
|
|
fields_json = sys.argv[2]
|
|
output_pdf = sys.argv[3]
|
|
|
|
fill_pdf_form(input_pdf, fields_json, output_pdf) |