Adding file_source.s as optional attribute to text.s requests

This commit is contained in:
Benjamin L
2025-05-21 15:10:27 +02:00
parent 702e87492c
commit 62b536ea6f

View File

@@ -84,22 +84,33 @@ class InsertTextRequest(BaseModel):
Attributes: Attributes:
text: The text content to be inserted into the RAG system text: The text content to be inserted into the RAG system
file_source: Source of the text (optional)
""" """
text: str = Field( text: str = Field(
min_length=1, min_length=1,
description="The text to insert", description="The text to insert",
) )
file_source: str = Field(
default=None,
min_length=0,
description="File Source"
)
@field_validator("text", mode="after") @field_validator("text", mode="after")
@classmethod @classmethod
def strip_after(cls, text: str) -> str: def strip_after(cls, text: str) -> str:
return text.strip() return text.strip()
@field_validator("file_source", mode="after")
@classmethod
def strip_after(cls, file_source: str) -> str:
return file_source.strip()
class Config: class Config:
json_schema_extra = { json_schema_extra = {
"example": { "example": {
"text": "This is a sample text to be inserted into the RAG system." "text": "This is a sample text to be inserted into the RAG system.",
"file_source": "Source of the text (optional)"
} }
} }
@@ -109,24 +120,37 @@ class InsertTextsRequest(BaseModel):
Attributes: Attributes:
texts: List of text contents to be inserted into the RAG system texts: List of text contents to be inserted into the RAG system
file_sources: Sources of the texts (optional)
""" """
texts: list[str] = Field( texts: list[str] = Field(
min_length=1, min_length=1,
description="The texts to insert", description="The texts to insert",
) )
file_sources: list[str] = Field(
default=None,
min_length=0,
description="Sources of the texts"
)
@field_validator("texts", mode="after") @field_validator("texts", mode="after")
@classmethod @classmethod
def strip_after(cls, texts: list[str]) -> list[str]: def strip_after(cls, texts: list[str]) -> list[str]:
return [text.strip() for text in texts] return [text.strip() for text in texts]
@field_validator("file_sources", mode="after")
@classmethod
def strip_after(cls, file_sources: list[str]) -> list[str]:
return [file_source.strip() for file_source in file_sources]
class Config: class Config:
json_schema_extra = { json_schema_extra = {
"example": { "example": {
"texts": [ "texts": [
"This is the first text to be inserted.", "This is the first text to be inserted.",
"This is the second text to be inserted.", "This is the second text to be inserted.",
],
"file_sources": [
"First file source (optional)",
] ]
} }
} }
@@ -656,16 +680,20 @@ async def pipeline_index_files(rag: LightRAG, file_paths: List[Path]):
logger.error(traceback.format_exc()) logger.error(traceback.format_exc())
async def pipeline_index_texts(rag: LightRAG, texts: List[str]): async def pipeline_index_texts(rag: LightRAG, texts: List[str],file_sources: List[str]=None):
"""Index a list of texts """Index a list of texts
Args: Args:
rag: LightRAG instance rag: LightRAG instance
texts: The texts to index texts: The texts to index
file_sources: Sources of the texts
""" """
if not texts: if not texts:
return return
await rag.apipeline_enqueue_documents(texts) if file_sources is not None:
if len(file_sources) != 0 and len(file_sources) != len(texts):
[file_sources.append("unknown_source") for _ in range(len(file_sources),len(texts))]
await rag.apipeline_enqueue_documents(input=texts,file_paths=file_sources)
await rag.apipeline_process_enqueue_documents() await rag.apipeline_process_enqueue_documents()
@@ -816,7 +844,7 @@ def create_document_routes(
HTTPException: If an error occurs during text processing (500). HTTPException: If an error occurs during text processing (500).
""" """
try: try:
background_tasks.add_task(pipeline_index_texts, rag, [request.text]) background_tasks.add_task(pipeline_index_texts, rag, [request.text],file_sources=[request.file_source])
return InsertResponse( return InsertResponse(
status="success", status="success",
message="Text successfully received. Processing will continue in background.", message="Text successfully received. Processing will continue in background.",
@@ -851,7 +879,7 @@ def create_document_routes(
HTTPException: If an error occurs during text processing (500). HTTPException: If an error occurs during text processing (500).
""" """
try: try:
background_tasks.add_task(pipeline_index_texts, rag, request.texts) background_tasks.add_task(pipeline_index_texts, rag, request.texts,file_sources=request.file_sources)
return InsertResponse( return InsertResponse(
status="success", status="success",
message="Text successfully received. Processing will continue in background.", message="Text successfully received. Processing will continue in background.",