Merge remote-tracking branch 'origin/main' into graph-viewer-webui
This commit is contained in:
@@ -85,7 +85,7 @@ Use the below Python snippet (in a script) to initialize LightRAG and perform qu
|
|||||||
```python
|
```python
|
||||||
import os
|
import os
|
||||||
from lightrag import LightRAG, QueryParam
|
from lightrag import LightRAG, QueryParam
|
||||||
from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete
|
from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete, openai_embed
|
||||||
|
|
||||||
#########
|
#########
|
||||||
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
|
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
|
||||||
@@ -95,12 +95,12 @@ from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete
|
|||||||
|
|
||||||
WORKING_DIR = "./dickens"
|
WORKING_DIR = "./dickens"
|
||||||
|
|
||||||
|
|
||||||
if not os.path.exists(WORKING_DIR):
|
if not os.path.exists(WORKING_DIR):
|
||||||
os.mkdir(WORKING_DIR)
|
os.mkdir(WORKING_DIR)
|
||||||
|
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
|
embedding_func=openai_embed,
|
||||||
llm_model_func=gpt_4o_mini_complete # Use gpt_4o_mini_complete LLM model
|
llm_model_func=gpt_4o_mini_complete # Use gpt_4o_mini_complete LLM model
|
||||||
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
||||||
)
|
)
|
||||||
|
@@ -226,3 +226,7 @@ class DocStatusStorage(BaseKVStorage):
|
|||||||
async def get_pending_docs(self) -> dict[str, DocProcessingStatus]:
|
async def get_pending_docs(self) -> dict[str, DocProcessingStatus]:
|
||||||
"""Get all pending documents"""
|
"""Get all pending documents"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
async def update_doc_status(self, data: dict[str, Any]) -> None:
|
||||||
|
"""Updates the status of a document. By default, it calls upsert."""
|
||||||
|
await self.upsert(data)
|
||||||
|
@@ -471,7 +471,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
self, status: DocStatus
|
self, status: DocStatus
|
||||||
) -> Dict[str, DocProcessingStatus]:
|
) -> Dict[str, DocProcessingStatus]:
|
||||||
"""Get all documents by status"""
|
"""Get all documents by status"""
|
||||||
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and status=$1"
|
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and status=$2"
|
||||||
params = {"workspace": self.db.workspace, "status": status}
|
params = {"workspace": self.db.workspace, "status": status}
|
||||||
result = await self.db.query(sql, params, True)
|
result = await self.db.query(sql, params, True)
|
||||||
return {
|
return {
|
||||||
@@ -505,8 +505,8 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
Args:
|
Args:
|
||||||
data: Dictionary of document IDs and their status data
|
data: Dictionary of document IDs and their status data
|
||||||
"""
|
"""
|
||||||
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content_summary,content_length,chunks_count,status)
|
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status)
|
||||||
values($1,$2,$3,$4,$5,$6)
|
values($1,$2,$3,$4,$5,$6,$7)
|
||||||
on conflict(id,workspace) do update set
|
on conflict(id,workspace) do update set
|
||||||
content = EXCLUDED.content,
|
content = EXCLUDED.content,
|
||||||
content_summary = EXCLUDED.content_summary,
|
content_summary = EXCLUDED.content_summary,
|
||||||
@@ -530,6 +530,32 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
)
|
)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
async def update_doc_status(self, data: dict[str, dict]) -> None:
|
||||||
|
"""
|
||||||
|
Updates only the document status, chunk count, and updated timestamp.
|
||||||
|
|
||||||
|
This method ensures that only relevant fields are updated instead of overwriting
|
||||||
|
the entire document record. If `updated_at` is not provided, the database will
|
||||||
|
automatically use the current timestamp.
|
||||||
|
"""
|
||||||
|
sql = """
|
||||||
|
UPDATE LIGHTRAG_DOC_STATUS
|
||||||
|
SET status = $3,
|
||||||
|
chunks_count = $4,
|
||||||
|
updated_at = CURRENT_TIMESTAMP
|
||||||
|
WHERE workspace = $1 AND id = $2
|
||||||
|
"""
|
||||||
|
for k, v in data.items():
|
||||||
|
_data = {
|
||||||
|
"workspace": self.db.workspace,
|
||||||
|
"id": k,
|
||||||
|
"status": v["status"].value, # Convert Enum to string
|
||||||
|
"chunks_count": v.get(
|
||||||
|
"chunks_count", -1
|
||||||
|
), # Default to -1 if not provided
|
||||||
|
}
|
||||||
|
await self.db.execute(sql, _data)
|
||||||
|
|
||||||
|
|
||||||
class PGGraphQueryException(Exception):
|
class PGGraphQueryException(Exception):
|
||||||
"""Exception for the AGE queries."""
|
"""Exception for the AGE queries."""
|
||||||
@@ -1103,6 +1129,7 @@ TABLES = {
|
|||||||
"ddl": """CREATE TABLE LIGHTRAG_DOC_STATUS (
|
"ddl": """CREATE TABLE LIGHTRAG_DOC_STATUS (
|
||||||
workspace varchar(255) NOT NULL,
|
workspace varchar(255) NOT NULL,
|
||||||
id varchar(255) NOT NULL,
|
id varchar(255) NOT NULL,
|
||||||
|
content TEXT,
|
||||||
content_summary varchar(255) NULL,
|
content_summary varchar(255) NULL,
|
||||||
content_length int4 NULL,
|
content_length int4 NULL,
|
||||||
chunks_count int4 NULL,
|
chunks_count int4 NULL,
|
||||||
|
@@ -632,7 +632,7 @@ class LightRAG:
|
|||||||
]
|
]
|
||||||
try:
|
try:
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
await self.doc_status.upsert(
|
await self.doc_status.update_doc_status(
|
||||||
{
|
{
|
||||||
doc_status_id: {
|
doc_status_id: {
|
||||||
"status": DocStatus.PROCESSED,
|
"status": DocStatus.PROCESSED,
|
||||||
@@ -649,7 +649,7 @@ class LightRAG:
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to process document {doc_id}: {str(e)}")
|
logger.error(f"Failed to process document {doc_id}: {str(e)}")
|
||||||
await self.doc_status.upsert(
|
await self.doc_status.update_doc_status(
|
||||||
{
|
{
|
||||||
doc_status_id: {
|
doc_status_id: {
|
||||||
"status": DocStatus.FAILED,
|
"status": DocStatus.FAILED,
|
||||||
|
Reference in New Issue
Block a user