Source code for bigdata_client.models.document

from dataclasses import dataclass
from enum import Enum
from functools import cached_property
from typing import Optional

from pydantic import BaseModel

from bigdata_client.api.knowledge_graph import ByIdsRequest
from bigdata_client.connection_protocol import BigdataConnectionProtocol
from bigdata_client.query_type import QueryType


[docs] class DocumentSource(BaseModel): """The source of a document""" key: str name: str rank: int
[docs] class DocumentScope(Enum): """ The type of the document. """ NEWS = "news" FILINGS = "filings" TRANSCRIPTS = "transcripts" FILES = "files"
[docs] class DocumentSentenceEntity(BaseModel): """ A detection instance of an entity in a sentence """ key: str start: int end: int query_type: QueryType
[docs] class DocumentSentence(BaseModel): paragraph: int sentence: int
[docs] class DocumentChunk(BaseModel): """ A chunk of text representing a contextual unit within the document """ text: str chunk: int entities: list[DocumentSentenceEntity] sentences: list[DocumentSentence] relevance: float sentiment: float section_metadata: Optional[list[str]] speaker: Optional[str] # Keeps track of the connection to Bigdata _api: BigdataConnectionProtocol def __init__(self, **data): super().__init__(**data) if "_api" in data: self._api = data["_api"] @cached_property def resolved_speaker(self): if not self.speaker: return None by_ids_results = self._api.by_ids( ByIdsRequest.model_validate( [{"key": self.speaker, "queryType": QueryType.ENTITY}] ) ) speaker_entity = by_ids_results.root.get(self.speaker) return speaker_entity.name if speaker_entity else None