#!/usr/bin/env python3 from __future__ import annotations import argparse import datetime as dt import os import subprocess import tempfile from pathlib import Path from typing import Iterable from openpyxl import load_workbook def _excel_col_letter(index_1_based: int) -> str: result = "" n = index_1_based while n: n, rem = divmod(n - 1, 26) result = chr(ord("A") + rem) + result return result def _stringify_cell(value) -> str: if value is None: return "" if isinstance(value, (dt.datetime, dt.date)): return value.isoformat() return str(value) def _troff_escape(text: str) -> str: text = text.replace("\\", "\\\\") text = text.replace("\t", " ") text = text.replace("|", "¦") return text def _troff_field(text: str) -> str: text = _troff_escape(text) if not text: return "T{\n\\&\nT}" lines = text.splitlines() or [text] safe_lines: list[str] = [] for line in lines: if line.startswith((".", "'")): safe_lines.append(r"\&" + line) else: safe_lines.append(line) body = "\n".join(safe_lines) return f"T{{\n{body}\nT}}" def _used_bounds(ws) -> tuple[int, int]: last_row = 0 last_col = 0 for row_idx, row in enumerate(ws.iter_rows(values_only=True), start=1): if any(v not in (None, "") for v in row): last_row = row_idx for col_idx, v in enumerate(row, start=1): if v not in (None, ""): last_col = max(last_col, col_idx) return last_row, last_col def _iter_rows(ws, max_row: int, max_col: int) -> Iterable[list[str]]: for row in ws.iter_rows(min_row=1, max_row=max_row, min_col=1, max_col=max_col, values_only=True): yield [_stringify_cell(v) for v in row] def _emit_table(rows: list[list[str]], *, col_slice: slice, title: str) -> str: cols = list(range(col_slice.start or 0, col_slice.stop or len(rows[0]))) ncols = len(cols) if ncols <= 0: return "" spec = " ".join(["l"] * ncols) + "." out: list[str] = [] out.append(".LP") out.append(rf"\fB{_troff_escape(title)}\fP") out.append(".TS") out.append("tab(|) expand;") out.append(spec) header = rows[0] header_fields = [] for i in cols: header_fields.append(_troff_field(header[i])) out.append("|".join(header_fields)) for row in rows[1:]: fields = [_troff_field(row[i]) for i in cols] out.append("|".join(fields)) out.append(".TE") out.append(".LP") return "\n".join(out) + "\n" def build_troff_from_xlsx(xlsx_path: Path, generated_at: dt.datetime) -> str: wb = load_workbook(xlsx_path, data_only=False) lines: list[str] = [] lines.append('.\" Auto-generated from XLSX') lines.append(".po 1i") lines.append(".ll 6.5i") lines.append(".ps 10") lines.append(".vs 12") lines.append("") lines.append(".ps 18") lines.append(".vs 22") lines.append(".ce") lines.append(r"\fBDefinição de Ranking — AtuaCAPES\fP") lines.append(".sp 0.5") lines.append(".ps 11") lines.append(".vs 14") lines.append(".ce") lines.append("Documento gerado automaticamente") lines.append(".sp 0.5") lines.append(".ce") lines.append(_troff_escape(generated_at.strftime("%Y-%m-%d %H:%M"))) lines.append(".sp 1") lines.append(rf"Fonte: \fB{_troff_escape(str(xlsx_path))}\fP") lines.append(".LP") lines.append("Conteúdo: todas as abas da planilha, com critérios e regras conforme definidos no arquivo de entrada.") lines.append(".bp") lines.append(".ps 14") lines.append(".vs 18") lines.append(r"\fBSumário de Abas\fP") lines.append(".ps 10") lines.append(".vs 12") lines.append(".sp 0.5") for i, name in enumerate(wb.sheetnames, start=1): lines.append(rf"{i}. \fB{_troff_escape(name)}\fP") lines.append(".br") lines.append(".bp") for sheet_name in wb.sheetnames: ws = wb[sheet_name] max_row, max_col = _used_bounds(ws) if max_row == 0 or max_col == 0: continue rows = list(_iter_rows(ws, max_row=max_row, max_col=max_col)) if not rows: continue lines.append(".ps 14") lines.append(".vs 18") lines.append(rf"\fB{_troff_escape(sheet_name)}\fP") lines.append(".ps 10") lines.append(".vs 12") lines.append(".LP") lines.append(rf"Linhas: \fB{max_row}\fP — Colunas: \fB{max_col}\fP") max_cols_per_table = 8 if max_col <= max_cols_per_table: start_letter = _excel_col_letter(1) end_letter = _excel_col_letter(max_col) lines.append(_emit_table(rows, col_slice=slice(0, max_col), title=f"Colunas {start_letter}–{end_letter}")) else: start = 0 while start < max_col: end = min(start + max_cols_per_table, max_col) start_letter = _excel_col_letter(start + 1) end_letter = _excel_col_letter(end) lines.append( _emit_table( rows, col_slice=slice(start, end), title=f"Colunas {start_letter}–{end_letter}", ) ) start = end lines.append(".bp") return "\n".join(lines) def render_pdf_from_ms(ms_text: str, pdf_path: Path, *, workdir: Path) -> None: pdf_path.parent.mkdir(parents=True, exist_ok=True) with tempfile.TemporaryDirectory(dir=workdir) as td: td_path = Path(td) ms_path = td_path / "doc.ms" ps_path = td_path / "doc.ps" ms_path.write_text(ms_text, encoding="utf-8") groff_cmd = ["groff", "-Kutf8", "-Tps", "-t", str(ms_path)] ps_bytes = subprocess.check_output(groff_cmd) ps_path.write_bytes(ps_bytes) subprocess.check_call(["ps2pdf", str(ps_path), str(pdf_path)]) def main() -> int: parser = argparse.ArgumentParser(description="Gera PDF (profissional) a partir de planilha XLSX.") parser.add_argument("xlsx", type=Path, help="Caminho do XLSX de entrada") parser.add_argument( "-o", "--out", type=Path, default=Path("out/definicao_ranking_atuacapes.pdf"), help="Caminho do PDF de saída (default: out/definicao_ranking_atuacapes.pdf)", ) args = parser.parse_args() xlsx_path: Path = args.xlsx out_pdf: Path = args.out generated_at = dt.datetime.now().astimezone() ms_text = build_troff_from_xlsx(xlsx_path=xlsx_path, generated_at=generated_at) render_pdf_from_ms(ms_text=ms_text, pdf_path=out_pdf, workdir=Path(os.getcwd())) print(out_pdf) return 0 if __name__ == "__main__": raise SystemExit(main())