cleaned code
This commit is contained in:
@@ -443,6 +443,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
return {}
|
return {}
|
||||||
else:
|
else:
|
||||||
return DocProcessingStatus(
|
return DocProcessingStatus(
|
||||||
|
content=result[0]["content"],
|
||||||
content_length=result[0]["content_length"],
|
content_length=result[0]["content_length"],
|
||||||
content_summary=result[0]["content_summary"],
|
content_summary=result[0]["content_summary"],
|
||||||
status=result[0]["status"],
|
status=result[0]["status"],
|
||||||
@@ -471,10 +472,9 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and status=$1"
|
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and status=$1"
|
||||||
params = {"workspace": self.db.workspace, "status": status}
|
params = {"workspace": self.db.workspace, "status": status}
|
||||||
result = await self.db.query(sql, params, True)
|
result = await self.db.query(sql, params, True)
|
||||||
# Result is like [{'id': 'id1', 'status': 'PENDING', 'updated_at': '2023-07-01 00:00:00'}, {'id': 'id2', 'status': 'PENDING', 'updated_at': '2023-07-01 00:00:00'}, ...]
|
|
||||||
# Converting to be a dict
|
|
||||||
return {
|
return {
|
||||||
element["id"]: DocProcessingStatus(
|
element["id"]: DocProcessingStatus(
|
||||||
|
content=result[0]["content"],
|
||||||
content_summary=element["content_summary"],
|
content_summary=element["content_summary"],
|
||||||
content_length=element["content_length"],
|
content_length=element["content_length"],
|
||||||
status=element["status"],
|
status=element["status"],
|
||||||
@@ -506,6 +506,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content_summary,content_length,chunks_count,status)
|
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content_summary,content_length,chunks_count,status)
|
||||||
values($1,$2,$3,$4,$5,$6)
|
values($1,$2,$3,$4,$5,$6)
|
||||||
on conflict(id,workspace) do update set
|
on conflict(id,workspace) do update set
|
||||||
|
content = EXCLUDED.content,
|
||||||
content_summary = EXCLUDED.content_summary,
|
content_summary = EXCLUDED.content_summary,
|
||||||
content_length = EXCLUDED.content_length,
|
content_length = EXCLUDED.content_length,
|
||||||
chunks_count = EXCLUDED.chunks_count,
|
chunks_count = EXCLUDED.chunks_count,
|
||||||
@@ -518,6 +519,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
{
|
{
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.db.workspace,
|
||||||
"id": k,
|
"id": k,
|
||||||
|
"content": v["content"],
|
||||||
"content_summary": v["content_summary"],
|
"content_summary": v["content_summary"],
|
||||||
"content_length": v["content_length"],
|
"content_length": v["content_length"],
|
||||||
"chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
|
"chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
|
||||||
|
@@ -544,6 +544,7 @@ class LightRAG:
|
|||||||
return
|
return
|
||||||
|
|
||||||
to_process_docs_ids = list(to_process_docs.keys())
|
to_process_docs_ids = list(to_process_docs.keys())
|
||||||
|
|
||||||
# Get allready processed documents (text chunks and full docs)
|
# Get allready processed documents (text chunks and full docs)
|
||||||
text_chunks_processed_doc_ids = await self.text_chunks.filter_keys(
|
text_chunks_processed_doc_ids = await self.text_chunks.filter_keys(
|
||||||
to_process_docs_ids
|
to_process_docs_ids
|
||||||
@@ -570,9 +571,8 @@ class LightRAG:
|
|||||||
ids_doc_processing_status,
|
ids_doc_processing_status,
|
||||||
desc=f"Process Batch {batch_idx}",
|
desc=f"Process Batch {batch_idx}",
|
||||||
):
|
):
|
||||||
# Update status in processing
|
|
||||||
id_doc, status_doc = id_doc_processing_status
|
id_doc, status_doc = id_doc_processing_status
|
||||||
|
# Update status in processing
|
||||||
await self.doc_status.upsert(
|
await self.doc_status.upsert(
|
||||||
{
|
{
|
||||||
id_doc: {
|
id_doc: {
|
||||||
@@ -601,8 +601,8 @@ class LightRAG:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Ensure chunk insertion and graph processing happen sequentially, not in parallel
|
# Ensure chunk insertion and graph processing happen sequentially, not in parallel
|
||||||
await self._process_entity_relation_graph(chunks)
|
|
||||||
await self.chunks_vdb.upsert(chunks)
|
await self.chunks_vdb.upsert(chunks)
|
||||||
|
await self._process_entity_relation_graph(chunks)
|
||||||
|
|
||||||
tasks[id_doc] = []
|
tasks[id_doc] = []
|
||||||
# Check if document already processed the doc
|
# Check if document already processed the doc
|
||||||
|
Reference in New Issue
Block a user