feat: Implementa job de ranking para 300k consultores

Backend: - Adiciona Scroll API no cliente Elasticsearch para processar todos os 300k+ consultores - Cria tabela TB_RANKING_CONSULTOR no Oracle para ranking pré-calculado - Implementa job de processamento com APScheduler (diário às 3h) - Adiciona endpoints: /ranking/paginado, /ranking/status, /ranking/processar, /ranking/estatisticas - Repository Oracle com paginação eficiente via ROW_NUMBER - Status do job com progresso em tempo real (polling) - Leitura automática de LOBs no OracleClient Frontend: - Componente RankingPaginado com paginação completa - Barra de progresso do job em tempo real - Botão para reprocessar ranking - Alternância entre Top N (rápido) e Ranking Completo (300k) Infraestrutura: - Docker compose com depends_on para garantir Oracle disponível - Schema SQL com procedure SP_ATUALIZAR_POSICOES - Índices otimizados para paginação
2025-12-10 01:33:00 -03:00
parent 0213a55791
commit 3ea6a4409e
19 changed files with 1596 additions and 20 deletions
--- a/backend/src/infrastructure/elasticsearch/client.py
+++ b/backend/src/infrastructure/elasticsearch/client.py
@@ -359,3 +359,129 @@ class ElasticsearchClient:
            return results
        except Exception as e:
            raise RuntimeError(f"Erro ao buscar ranking com score: {e}")
+
+    async def iniciar_scroll(self, size: int = 1000, scroll_timeout: str = "5m") -> dict:
+        """
+        Inicia um scroll para iterar por todos os documentos com atuações.
+        Retorna o scroll_id e a primeira página de resultados.
+        """
+        query = {
+            "query": {
+                "nested": {
+                    "path": "atuacoes",
+                    "query": {"exists": {"field": "atuacoes.tipo"}}
+                }
+            },
+            "_source": ["id", "dadosPessoais", "atuacoes"],
+            "size": size,
+            "sort": [{"id": "asc"}]
+        }
+
+        try:
+            response = await self.client.post(
+                f"{self.url}/{self.index}/_search?scroll={scroll_timeout}",
+                json=query,
+                timeout=120.0
+            )
+            response.raise_for_status()
+
+            data = response.json()
+            return {
+                "scroll_id": data.get("_scroll_id"),
+                "hits": [hit["_source"] for hit in data.get("hits", {}).get("hits", [])],
+                "total": data.get("hits", {}).get("total", {}).get("value", 0)
+            }
+        except Exception as e:
+            raise RuntimeError(f"Erro ao iniciar scroll: {e}")
+
+    async def continuar_scroll(self, scroll_id: str, scroll_timeout: str = "5m") -> dict:
+        """
+        Continua um scroll existente usando o scroll_id.
+        Retorna a próxima página de resultados.
+        """
+        try:
+            response = await self.client.post(
+                f"{self.url}/_search/scroll",
+                json={
+                    "scroll": scroll_timeout,
+                    "scroll_id": scroll_id
+                },
+                timeout=120.0
+            )
+            response.raise_for_status()
+
+            data = response.json()
+            return {
+                "scroll_id": data.get("_scroll_id"),
+                "hits": [hit["_source"] for hit in data.get("hits", {}).get("hits", [])]
+            }
+        except Exception as e:
+            raise RuntimeError(f"Erro ao continuar scroll: {e}")
+
+    async def limpar_scroll(self, scroll_id: str) -> None:
+        """
+        Limpa o contexto de scroll após uso.
+        """
+        try:
+            await self.client.delete(
+                f"{self.url}/_search/scroll",
+                json={"scroll_id": scroll_id},
+                timeout=30.0
+            )
+        except Exception:
+            pass
+
+    async def buscar_todos_consultores(self, callback, batch_size: int = 1000):
+        """
+        Itera por TODOS os consultores do índice usando Scroll API.
+        Chama callback(batch, progress) para cada batch de documentos.
+
+        Args:
+            callback: função assíncrona que recebe (docs: list, progress: dict)
+            batch_size: tamanho do batch (padrão 1000)
+
+        Progress dict contém:
+            - total: total de documentos
+            - processados: documentos processados até agora
+            - batch_atual: número do batch atual
+            - percentual: percentual de progresso (0-100)
+        """
+        scroll_id = None
+        try:
+            result = await self.iniciar_scroll(size=batch_size)
+            scroll_id = result["scroll_id"]
+            total = result["total"]
+            hits = result["hits"]
+
+            processados = 0
+            batch_atual = 1
+
+            while hits:
+                processados += len(hits)
+                percentual = int((processados / total) * 100) if total > 0 else 100
+
+                progress = {
+                    "total": total,
+                    "processados": processados,
+                    "batch_atual": batch_atual,
+                    "percentual": percentual
+                }
+
+                await callback(hits, progress)
+
+                if len(hits) < batch_size:
+                    break
+
+                result = await self.continuar_scroll(scroll_id)
+                scroll_id = result["scroll_id"]
+                hits = result["hits"]
+                batch_atual += 1
+
+            return {
+                "total": total,
+                "processados": processados,
+                "batches": batch_atual
+            }
+        finally:
+            if scroll_id:
+                await self.limpar_scroll(scroll_id)
--- a/backend/src/infrastructure/oracle/client.py
+++ b/backend/src/infrastructure/oracle/client.py
@@ -57,8 +57,19 @@ class OracleClient:
                cursor.execute(query, params or {})
                columns = [col[0] for col in cursor.description]
                rows = cursor.fetchall()
+
+                results = []
+                for row in rows:
+                    row_dict = {}
+                    for i, col in enumerate(columns):
+                        value = row[i]
+                        if hasattr(value, 'read'):
+                            value = value.read()
+                        row_dict[col] = value
+                    results.append(row_dict)
+
                cursor.close()
-                return [dict(zip(columns, row)) for row in rows]
+                return results
        except Exception as e:
            print(f"AVISO Oracle: falha ao executar query: {e}")
            self._connected = False
--- a/backend/src/infrastructure/oracle/ranking_repository.py
+++ b/backend/src/infrastructure/oracle/ranking_repository.py
@@ -0,0 +1,341 @@
+from typing import List, Optional, Dict, Any
+from datetime import datetime
+import json
+
+from ...domain.entities.consultor_ranking import ConsultorRanking
+from .client import OracleClient
+
+
+class RankingOracleRepository:
+    def __init__(self, oracle_client: OracleClient):
+        self.client = oracle_client
+
+    def inserir_batch(self, consultores: List[Dict[str, Any]]) -> int:
+        """
+        Insere ou atualiza um batch de consultores usando MERGE.
+        Retorna o número de registros processados.
+        """
+        if not consultores:
+            return 0
+
+        merge_sql = """
+            MERGE INTO TB_RANKING_CONSULTOR t
+            USING (
+                SELECT
+                    :id_pessoa AS ID_PESSOA,
+                    :nome AS NOME,
+                    :pontuacao_total AS PONTUACAO_TOTAL,
+                    :componente_a AS COMPONENTE_A,
+                    :componente_b AS COMPONENTE_B,
+                    :componente_c AS COMPONENTE_C,
+                    :componente_d AS COMPONENTE_D,
+                    :ativo AS ATIVO,
+                    :anos_atuacao AS ANOS_ATUACAO,
+                    :json_detalhes AS JSON_DETALHES
+                FROM DUAL
+            ) s
+            ON (t.ID_PESSOA = s.ID_PESSOA)
+            WHEN MATCHED THEN
+                UPDATE SET
+                    t.NOME = s.NOME,
+                    t.PONTUACAO_TOTAL = s.PONTUACAO_TOTAL,
+                    t.COMPONENTE_A = s.COMPONENTE_A,
+                    t.COMPONENTE_B = s.COMPONENTE_B,
+                    t.COMPONENTE_C = s.COMPONENTE_C,
+                    t.COMPONENTE_D = s.COMPONENTE_D,
+                    t.ATIVO = s.ATIVO,
+                    t.ANOS_ATUACAO = s.ANOS_ATUACAO,
+                    t.DT_CALCULO = CURRENT_TIMESTAMP,
+                    t.JSON_DETALHES = s.JSON_DETALHES
+            WHEN NOT MATCHED THEN
+                INSERT (
+                    ID_PESSOA, NOME, PONTUACAO_TOTAL,
+                    COMPONENTE_A, COMPONENTE_B, COMPONENTE_C, COMPONENTE_D,
+                    ATIVO, ANOS_ATUACAO, JSON_DETALHES, DT_CALCULO
+                )
+                VALUES (
+                    s.ID_PESSOA, s.NOME, s.PONTUACAO_TOTAL,
+                    s.COMPONENTE_A, s.COMPONENTE_B, s.COMPONENTE_C, s.COMPONENTE_D,
+                    s.ATIVO, s.ANOS_ATUACAO, s.JSON_DETALHES, CURRENT_TIMESTAMP
+                )
+        """
+
+        with self.client.get_connection() as conn:
+            cursor = conn.cursor()
+            try:
+                for consultor in consultores:
+                    params = {
+                        "id_pessoa": consultor["id_pessoa"],
+                        "nome": consultor["nome"],
+                        "pontuacao_total": consultor["pontuacao_total"],
+                        "componente_a": consultor["componente_a"],
+                        "componente_b": consultor["componente_b"],
+                        "componente_c": consultor["componente_c"],
+                        "componente_d": consultor["componente_d"],
+                        "ativo": "S" if consultor["ativo"] else "N",
+                        "anos_atuacao": consultor["anos_atuacao"],
+                        "json_detalhes": json.dumps(consultor.get("detalhes", {}), ensure_ascii=False)
+                    }
+                    cursor.execute(merge_sql, params)
+
+                conn.commit()
+                return len(consultores)
+            except Exception as e:
+                conn.rollback()
+                raise RuntimeError(f"Erro ao inserir batch no Oracle: {e}")
+            finally:
+                cursor.close()
+
+    def buscar_paginado(
+        self,
+        page: int = 1,
+        size: int = 50,
+        filtro_ativo: Optional[bool] = None
+    ) -> List[ConsultorRanking]:
+        """
+        Busca ranking paginado ordenado por posição.
+        """
+        offset = (page - 1) * size
+        limit_end = offset + size
+
+        where_clause = ""
+        params = {}
+
+        if filtro_ativo is not None:
+            where_clause = "AND ATIVO = :ativo"
+            params["ativo"] = "S" if filtro_ativo else "N"
+
+        query = f"""
+            SELECT * FROM (
+                SELECT
+                    ID_PESSOA,
+                    NOME,
+                    POSICAO,
+                    PONTUACAO_TOTAL,
+                    COMPONENTE_A,
+                    COMPONENTE_B,
+                    COMPONENTE_C,
+                    COMPONENTE_D,
+                    ATIVO,
+                    ANOS_ATUACAO,
+                    DT_CALCULO,
+                    JSON_DETALHES,
+                    ROW_NUMBER() OVER (ORDER BY POSICAO NULLS LAST, PONTUACAO_TOTAL DESC) AS RN
+                FROM TB_RANKING_CONSULTOR
+                WHERE 1=1 {where_clause}
+            )
+            WHERE RN > {offset} AND RN <= {limit_end}
+        """
+
+        results = self.client.executar_query(query, params)
+
+        consultores = []
+        for r in results:
+            json_det = r["JSON_DETALHES"]
+            if hasattr(json_det, "read"):
+                json_det = json_det.read()
+            else:
+                json_det = str(json_det) if json_det else "{}"
+
+            consultores.append(
+                ConsultorRanking(
+                    id_pessoa=r["ID_PESSOA"],
+                    nome=r["NOME"],
+                    posicao=r["POSICAO"],
+                    pontuacao_total=float(r["PONTUACAO_TOTAL"]),
+                    componente_a=float(r["COMPONENTE_A"]),
+                    componente_b=float(r["COMPONENTE_B"]),
+                    componente_c=float(r["COMPONENTE_C"]),
+                    componente_d=float(r["COMPONENTE_D"]),
+                    ativo=r["ATIVO"] == "S",
+                    anos_atuacao=float(r["ANOS_ATUACAO"]),
+                    dt_calculo=r["DT_CALCULO"],
+                    json_detalhes=json_det
+                )
+            )
+
+        return consultores
+
+    def contar_total(self, filtro_ativo: Optional[bool] = None) -> int:
+        """
+        Conta total de consultores no ranking.
+        """
+        where_clause = ""
+        params = {}
+
+        if filtro_ativo is not None:
+            where_clause = "WHERE ATIVO = :ativo"
+            params["ativo"] = "S" if filtro_ativo else "N"
+
+        query = f"SELECT COUNT(*) AS TOTAL FROM TB_RANKING_CONSULTOR {where_clause}"
+        results = self.client.executar_query(query, params)
+
+        return results[0]["TOTAL"] if results else 0
+
+    def buscar_por_id(self, id_pessoa: int) -> Optional[ConsultorRanking]:
+        """
+        Busca consultor específico com sua posição no ranking.
+        """
+        query = """
+            SELECT
+                ID_PESSOA,
+                NOME,
+                POSICAO,
+                PONTUACAO_TOTAL,
+                COMPONENTE_A,
+                COMPONENTE_B,
+                COMPONENTE_C,
+                COMPONENTE_D,
+                ATIVO,
+                ANOS_ATUACAO,
+                DT_CALCULO,
+                JSON_DETALHES
+            FROM TB_RANKING_CONSULTOR
+            WHERE ID_PESSOA = :id_pessoa
+        """
+
+        results = self.client.executar_query(query, {"id_pessoa": id_pessoa})
+
+        if not results:
+            return None
+
+        r = results[0]
+
+        json_det = r["JSON_DETALHES"]
+        if hasattr(json_det, "read"):
+            json_det = json_det.read()
+        else:
+            json_det = str(json_det) if json_det else "{}"
+
+        return ConsultorRanking(
+            id_pessoa=r["ID_PESSOA"],
+            nome=r["NOME"],
+            posicao=r["POSICAO"],
+            pontuacao_total=float(r["PONTUACAO_TOTAL"]),
+            componente_a=float(r["COMPONENTE_A"]),
+            componente_b=float(r["COMPONENTE_B"]),
+            componente_c=float(r["COMPONENTE_C"]),
+            componente_d=float(r["COMPONENTE_D"]),
+            ativo=r["ATIVO"] == "S",
+            anos_atuacao=float(r["ANOS_ATUACAO"]),
+            dt_calculo=r["DT_CALCULO"],
+            json_detalhes=json_det
+        )
+
+    def atualizar_posicoes(self) -> None:
+        """
+        Chama a procedure SP_ATUALIZAR_POSICOES para recalcular as posições.
+        """
+        with self.client.get_connection() as conn:
+            cursor = conn.cursor()
+            try:
+                cursor.callproc("SP_ATUALIZAR_POSICOES")
+                conn.commit()
+            except Exception as e:
+                conn.rollback()
+                raise RuntimeError(f"Erro ao atualizar posições: {e}")
+            finally:
+                cursor.close()
+
+    def obter_estatisticas(self) -> Dict[str, Any]:
+        """
+        Retorna estatísticas do ranking.
+        """
+        query = """
+            SELECT
+                COUNT(*) AS TOTAL_CONSULTORES,
+                COUNT(CASE WHEN ATIVO = 'S' THEN 1 END) AS TOTAL_ATIVOS,
+                COUNT(CASE WHEN ATIVO = 'N' THEN 1 END) AS TOTAL_INATIVOS,
+                MAX(DT_CALCULO) AS ULTIMA_ATUALIZACAO,
+                AVG(PONTUACAO_TOTAL) AS PONTUACAO_MEDIA,
+                MAX(PONTUACAO_TOTAL) AS PONTUACAO_MAXIMA,
+                MIN(PONTUACAO_TOTAL) AS PONTUACAO_MINIMA,
+                AVG(COMPONENTE_A) AS MEDIA_COMP_A,
+                AVG(COMPONENTE_B) AS MEDIA_COMP_B,
+                AVG(COMPONENTE_C) AS MEDIA_COMP_C,
+                AVG(COMPONENTE_D) AS MEDIA_COMP_D
+            FROM TB_RANKING_CONSULTOR
+        """
+
+        results = self.client.executar_query(query)
+
+        if not results:
+            return {}
+
+        r = results[0]
+        return {
+            "total_consultores": r["TOTAL_CONSULTORES"],
+            "total_ativos": r["TOTAL_ATIVOS"],
+            "total_inativos": r["TOTAL_INATIVOS"],
+            "ultima_atualizacao": r["ULTIMA_ATUALIZACAO"].isoformat() if r["ULTIMA_ATUALIZACAO"] else None,
+            "pontuacao_media": float(r["PONTUACAO_MEDIA"]) if r["PONTUACAO_MEDIA"] else 0,
+            "pontuacao_maxima": float(r["PONTUACAO_MAXIMA"]) if r["PONTUACAO_MAXIMA"] else 0,
+            "pontuacao_minima": float(r["PONTUACAO_MINIMA"]) if r["PONTUACAO_MINIMA"] else 0,
+            "media_componentes": {
+                "a": float(r["MEDIA_COMP_A"]) if r["MEDIA_COMP_A"] else 0,
+                "b": float(r["MEDIA_COMP_B"]) if r["MEDIA_COMP_B"] else 0,
+                "c": float(r["MEDIA_COMP_C"]) if r["MEDIA_COMP_C"] else 0,
+                "d": float(r["MEDIA_COMP_D"]) if r["MEDIA_COMP_D"] else 0
+            }
+        }
+
+    def obter_distribuicao(self) -> List[Dict[str, Any]]:
+        """
+        Retorna distribuição de consultores por faixa de pontuação.
+        """
+        query = """
+            SELECT
+                CASE
+                    WHEN PONTUACAO_TOTAL >= 800 THEN '800+'
+                    WHEN PONTUACAO_TOTAL >= 600 THEN '600-799'
+                    WHEN PONTUACAO_TOTAL >= 400 THEN '400-599'
+                    WHEN PONTUACAO_TOTAL >= 200 THEN '200-399'
+                    ELSE '0-199'
+                END AS FAIXA,
+                COUNT(*) AS QUANTIDADE,
+                ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM TB_RANKING_CONSULTOR), 2) AS PERCENTUAL
+            FROM TB_RANKING_CONSULTOR
+            GROUP BY
+                CASE
+                    WHEN PONTUACAO_TOTAL >= 800 THEN '800+'
+                    WHEN PONTUACAO_TOTAL >= 600 THEN '600-799'
+                    WHEN PONTUACAO_TOTAL >= 400 THEN '400-599'
+                    WHEN PONTUACAO_TOTAL >= 200 THEN '200-399'
+                    ELSE '0-199'
+                END
+            ORDER BY
+                CASE
+                    WHEN FAIXA = '800+' THEN 1
+                    WHEN FAIXA = '600-799' THEN 2
+                    WHEN FAIXA = '400-599' THEN 3
+                    WHEN FAIXA = '200-399' THEN 4
+                    ELSE 5
+                END
+        """
+
+        results = self.client.executar_query(query)
+
+        return [
+            {
+                "faixa": r["FAIXA"],
+                "quantidade": r["QUANTIDADE"],
+                "percentual": float(r["PERCENTUAL"])
+            }
+            for r in results
+        ]
+
+    def limpar_tabela(self) -> None:
+        """
+        Limpa todos os registros da tabela de ranking.
+        Usar apenas quando for reprocessar do zero.
+        """
+        with self.client.get_connection() as conn:
+            cursor = conn.cursor()
+            try:
+                cursor.execute("DELETE FROM TB_RANKING_CONSULTOR")
+                conn.commit()
+            except Exception as e:
+                conn.rollback()
+                raise RuntimeError(f"Erro ao limpar tabela: {e}")
+            finally:
+                cursor.close()