diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index aa5af7e5..955e8753 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -84,22 +84,33 @@ class InsertTextRequest(BaseModel): Attributes: text: The text content to be inserted into the RAG system + file_source: Source of the text (optional) """ text: str = Field( min_length=1, description="The text to insert", ) - + file_source: str = Field( + default=None, + min_length=0, + description="File Source" + ) @field_validator("text", mode="after") @classmethod def strip_after(cls, text: str) -> str: return text.strip() - + + @field_validator("file_source", mode="after") + @classmethod + def strip_after(cls, file_source: str) -> str: + return file_source.strip() + class Config: json_schema_extra = { "example": { - "text": "This is a sample text to be inserted into the RAG system." + "text": "This is a sample text to be inserted into the RAG system.", + "file_source": "Source of the text (optional)" } } @@ -109,24 +120,37 @@ class InsertTextsRequest(BaseModel): Attributes: texts: List of text contents to be inserted into the RAG system + file_sources: Sources of the texts (optional) """ texts: list[str] = Field( min_length=1, description="The texts to insert", ) - + file_sources: list[str] = Field( + default=None, + min_length=0, + description="Sources of the texts" + ) + @field_validator("texts", mode="after") @classmethod def strip_after(cls, texts: list[str]) -> list[str]: return [text.strip() for text in texts] - + @field_validator("file_sources", mode="after") + @classmethod + def strip_after(cls, file_sources: list[str]) -> list[str]: + return [file_source.strip() for file_source in file_sources] + class Config: json_schema_extra = { "example": { "texts": [ "This is the first text to be inserted.", "This is the second text to be inserted.", + ], + "file_sources": [ + "First file source (optional)", ] } } @@ -656,16 +680,20 @@ async def pipeline_index_files(rag: LightRAG, file_paths: List[Path]): logger.error(traceback.format_exc()) -async def pipeline_index_texts(rag: LightRAG, texts: List[str]): +async def pipeline_index_texts(rag: LightRAG, texts: List[str],file_sources: List[str]=None): """Index a list of texts Args: rag: LightRAG instance texts: The texts to index + file_sources: Sources of the texts """ if not texts: return - await rag.apipeline_enqueue_documents(texts) + if file_sources is not None: + if len(file_sources) != 0 and len(file_sources) != len(texts): + [file_sources.append("unknown_source") for _ in range(len(file_sources),len(texts))] + await rag.apipeline_enqueue_documents(input=texts,file_paths=file_sources) await rag.apipeline_process_enqueue_documents() @@ -816,7 +844,7 @@ def create_document_routes( HTTPException: If an error occurs during text processing (500). """ try: - background_tasks.add_task(pipeline_index_texts, rag, [request.text]) + background_tasks.add_task(pipeline_index_texts, rag, [request.text],file_sources=[request.file_source]) return InsertResponse( status="success", message="Text successfully received. Processing will continue in background.", @@ -851,7 +879,7 @@ def create_document_routes( HTTPException: If an error occurs during text processing (500). """ try: - background_tasks.add_task(pipeline_index_texts, rag, request.texts) + background_tasks.add_task(pipeline_index_texts, rag, request.texts,file_sources=request.file_sources) return InsertResponse( status="success", message="Text successfully received. Processing will continue in background.",