diff --git a/README.md b/README.md index eef2ae53..7670e3da 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ pip install lightrag-hku ## Quick Start -* Set OpenAI API key in environment: `export OPENAI_API_KEY="sk-...".` +* Set OpenAI API key in environment: `export OPENAI_API_KEY="sk-..."`. * Download the demo text "A Christmas Carol by Charles Dickens" ``` curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt @@ -30,7 +30,12 @@ Use the below python snippet: ``` from lightrag import LightRAG, QueryParam -rag = LightRAG(working_dir="./dickens") +WORKING_DIR = "./dickens" + +if not os.path.exists(WORKING_DIR): + os.mkdir(WORKING_DIR) + +rag = LightRAG(working_dir=WORKING_DIR) with open("./book.txt") as f: rag.insert(f.read()) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index f11e868b..836fda9e 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -166,7 +166,7 @@ class LightRAG: try: if isinstance(string_or_strings, str): string_or_strings = [string_or_strings] - # ---------- new docs + new_docs = { compute_mdhash_id(c.strip(), prefix="doc-"): {"content": c.strip()} for c in string_or_strings @@ -178,7 +178,6 @@ class LightRAG: return logger.info(f"[New Docs] inserting {len(new_docs)} docs") - # ---------- chunking inserting_chunks = {} for doc_key, doc in new_docs.items(): chunks = { @@ -207,7 +206,19 @@ class LightRAG: await self.chunks_vdb.upsert(inserting_chunks) - # ---------- commit upsertings and indexing + logger.info("[Entity Extraction]...") + maybe_new_kg = await extract_entities( + inserting_chunks, + knwoledge_graph_inst=self.chunk_entity_relation_graph, + entity_vdb=self.entities_vdb, + relationships_vdb=self.relationships_vdb, + global_config=asdict(self), + ) + if maybe_new_kg is None: + logger.warning("No new entities and relationships found") + return + self.chunk_entity_relation_graph = maybe_new_kg + await self.full_docs.upsert(new_docs) await self.text_chunks.upsert(inserting_chunks) finally: diff --git a/lightrag/operate.py b/lightrag/operate.py index a8564f0d..2d3271da 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -940,5 +940,5 @@ async def naive_query( query, system_prompt=sys_prompt, ) - return (response, section) + return response