- Rename skill/ to skills/ for consistency with naming conventions - Rename agent/ to agents/ and command/ to commands/ - Update AGENTS.md with all directory references - Update scripts/test-skill.sh paths - Update prompts/athena.txt documentation This aligns with best practices of using plural directory names and updates all documentation to reflect the new structure.
115 lines
4.7 KiB
Python
115 lines
4.7 KiB
Python
import json
|
|
import sys
|
|
|
|
from pypdf import PdfReader, PdfWriter
|
|
|
|
from extract_form_field_info import get_field_info
|
|
|
|
|
|
# Fills fillable form fields in a PDF. See forms.md.
|
|
|
|
|
|
def fill_pdf_fields(input_pdf_path: str, fields_json_path: str, output_pdf_path: str):
|
|
with open(fields_json_path) as f:
|
|
fields = json.load(f)
|
|
# Group by page number.
|
|
fields_by_page = {}
|
|
for field in fields:
|
|
if "value" in field:
|
|
field_id = field["field_id"]
|
|
page = field["page"]
|
|
if page not in fields_by_page:
|
|
fields_by_page[page] = {}
|
|
fields_by_page[page][field_id] = field["value"]
|
|
|
|
reader = PdfReader(input_pdf_path)
|
|
|
|
has_error = False
|
|
field_info = get_field_info(reader)
|
|
fields_by_ids = {f["field_id"]: f for f in field_info}
|
|
for field in fields:
|
|
existing_field = fields_by_ids.get(field["field_id"])
|
|
if not existing_field:
|
|
has_error = True
|
|
print(f"ERROR: `{field['field_id']}` is not a valid field ID")
|
|
elif field["page"] != existing_field["page"]:
|
|
has_error = True
|
|
print(f"ERROR: Incorrect page number for `{field['field_id']}` (got {field['page']}, expected {existing_field['page']})")
|
|
else:
|
|
if "value" in field:
|
|
err = validation_error_for_field_value(existing_field, field["value"])
|
|
if err:
|
|
print(err)
|
|
has_error = True
|
|
if has_error:
|
|
sys.exit(1)
|
|
|
|
writer = PdfWriter(clone_from=reader)
|
|
for page, field_values in fields_by_page.items():
|
|
writer.update_page_form_field_values(writer.pages[page - 1], field_values, auto_regenerate=False)
|
|
|
|
# This seems to be necessary for many PDF viewers to format the form values correctly.
|
|
# It may cause the viewer to show a "save changes" dialog even if the user doesn't make any changes.
|
|
writer.set_need_appearances_writer(True)
|
|
|
|
with open(output_pdf_path, "wb") as f:
|
|
writer.write(f)
|
|
|
|
|
|
def validation_error_for_field_value(field_info, field_value):
|
|
field_type = field_info["type"]
|
|
field_id = field_info["field_id"]
|
|
if field_type == "checkbox":
|
|
checked_val = field_info["checked_value"]
|
|
unchecked_val = field_info["unchecked_value"]
|
|
if field_value != checked_val and field_value != unchecked_val:
|
|
return f'ERROR: Invalid value "{field_value}" for checkbox field "{field_id}". The checked value is "{checked_val}" and the unchecked value is "{unchecked_val}"'
|
|
elif field_type == "radio_group":
|
|
option_values = [opt["value"] for opt in field_info["radio_options"]]
|
|
if field_value not in option_values:
|
|
return f'ERROR: Invalid value "{field_value}" for radio group field "{field_id}". Valid values are: {option_values}'
|
|
elif field_type == "choice":
|
|
choice_values = [opt["value"] for opt in field_info["choice_options"]]
|
|
if field_value not in choice_values:
|
|
return f'ERROR: Invalid value "{field_value}" for choice field "{field_id}". Valid values are: {choice_values}'
|
|
return None
|
|
|
|
|
|
# pypdf (at least version 5.7.0) has a bug when setting the value for a selection list field.
|
|
# In _writer.py around line 966:
|
|
#
|
|
# if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
|
|
# txt = "\n".join(annotation.get_inherited(FA.Opt, []))
|
|
#
|
|
# The problem is that for selection lists, `get_inherited` returns a list of two-element lists like
|
|
# [["value1", "Text 1"], ["value2", "Text 2"], ...]
|
|
# This causes `join` to throw a TypeError because it expects an iterable of strings.
|
|
# The horrible workaround is to patch `get_inherited` to return a list of the value strings.
|
|
# We call the original method and adjust the return value only if the argument to `get_inherited`
|
|
# is `FA.Opt` and if the return value is a list of two-element lists.
|
|
def monkeypatch_pydpf_method():
|
|
from pypdf.generic import DictionaryObject
|
|
from pypdf.constants import FieldDictionaryAttributes
|
|
|
|
original_get_inherited = DictionaryObject.get_inherited
|
|
|
|
def patched_get_inherited(self, key: str, default = None):
|
|
result = original_get_inherited(self, key, default)
|
|
if key == FieldDictionaryAttributes.Opt:
|
|
if isinstance(result, list) and all(isinstance(v, list) and len(v) == 2 for v in result):
|
|
result = [r[0] for r in result]
|
|
return result
|
|
|
|
DictionaryObject.get_inherited = patched_get_inherited
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 4:
|
|
print("Usage: fill_fillable_fields.py [input pdf] [field_values.json] [output pdf]")
|
|
sys.exit(1)
|
|
monkeypatch_pydpf_method()
|
|
input_pdf = sys.argv[1]
|
|
fields_json = sys.argv[2]
|
|
output_pdf = sys.argv[3]
|
|
fill_pdf_fields(input_pdf, fields_json, output_pdf)
|