fix demo
This commit is contained in:
@@ -1,5 +1,9 @@
|
||||
import os
|
||||
import asyncio
|
||||
import nest_asyncio
|
||||
|
||||
nest_asyncio.apply()
|
||||
|
||||
from lightrag import LightRAG, QueryParam
|
||||
from lightrag.llm import (
|
||||
openai_complete_if_cache,
|
||||
@@ -7,10 +11,12 @@ from lightrag.llm import (
|
||||
)
|
||||
from lightrag.utils import EmbeddingFunc
|
||||
import numpy as np
|
||||
from lightrag.kg.shared_storage import initialize_pipeline_status
|
||||
|
||||
# for custom llm_model_func
|
||||
from lightrag.utils import locate_json_string_body_from_string
|
||||
|
||||
|
||||
WORKING_DIR = "./dickens"
|
||||
|
||||
if not os.path.exists(WORKING_DIR):
|
||||
@@ -91,42 +97,37 @@ async def test_funcs():
|
||||
|
||||
# asyncio.run(test_funcs())
|
||||
|
||||
async def initialize_rag():
|
||||
embedding_dimension = await get_embedding_dim()
|
||||
print(f"Detected embedding dimension: {embedding_dimension}")
|
||||
|
||||
# lightRAG class during indexing
|
||||
rag = LightRAG(
|
||||
working_dir=WORKING_DIR,
|
||||
llm_model_func=llm_model_func,
|
||||
# llm_model_name="meta/llama3-70b-instruct", #un comment if
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=embedding_dimension,
|
||||
max_token_size=512, # maximum token size, somehow it's still exceed maximum number of token
|
||||
# so truncate (trunc) parameter on embedding_func will handle it and try to examine the tokenizer used in LightRAG
|
||||
# so you can adjust to be able to fit the NVIDIA model (future work)
|
||||
func=indexing_embedding_func,
|
||||
),
|
||||
)
|
||||
|
||||
await rag.initialize_storages()
|
||||
await initialize_pipeline_status()
|
||||
|
||||
return rag
|
||||
async def main():
|
||||
try:
|
||||
embedding_dimension = await get_embedding_dim()
|
||||
print(f"Detected embedding dimension: {embedding_dimension}")
|
||||
|
||||
# lightRAG class during indexing
|
||||
rag = LightRAG(
|
||||
working_dir=WORKING_DIR,
|
||||
llm_model_func=llm_model_func,
|
||||
# llm_model_name="meta/llama3-70b-instruct", #un comment if
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=embedding_dimension,
|
||||
max_token_size=512, # maximum token size, somehow it's still exceed maximum number of token
|
||||
# so truncate (trunc) parameter on embedding_func will handle it and try to examine the tokenizer used in LightRAG
|
||||
# so you can adjust to be able to fit the NVIDIA model (future work)
|
||||
func=indexing_embedding_func,
|
||||
),
|
||||
)
|
||||
# Initialize RAG instance
|
||||
rag = asyncio.run(initialize_rag())
|
||||
|
||||
# reading file
|
||||
with open("./book.txt", "r", encoding="utf-8") as f:
|
||||
await rag.ainsert(f.read())
|
||||
|
||||
# redefine rag to change embedding into query type
|
||||
rag = LightRAG(
|
||||
working_dir=WORKING_DIR,
|
||||
llm_model_func=llm_model_func,
|
||||
# llm_model_name="meta/llama3-70b-instruct", #un comment if
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=embedding_dimension,
|
||||
max_token_size=512,
|
||||
func=query_embedding_func,
|
||||
),
|
||||
)
|
||||
|
||||
# Perform naive search
|
||||
print("==============Naive===============")
|
||||
print(
|
||||
|
Reference in New Issue
Block a user