{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "4b5690db12e34685", "metadata": { "ExecuteTime": { "end_time": "2025-01-07T05:38:34.174205Z", "start_time": "2025-01-07T05:38:29.978194Z" } }, "outputs": [], "source": [ "import os\n", "import logging\n", "import numpy as np\n", "from lightrag import LightRAG, QueryParam\n", "from lightrag.llm import openai_complete_if_cache, openai_embedding\n", "from lightrag.utils import EmbeddingFunc\n", "import nest_asyncio" ] }, { "cell_type": "code", "execution_count": 2, "id": "8c8ee7c061bf9159", "metadata": { "ExecuteTime": { "end_time": "2025-01-07T05:38:37.440083Z", "start_time": "2025-01-07T05:38:37.437666Z" } }, "outputs": [], "source": [ "nest_asyncio.apply()\n", "WORKING_DIR = \"../llm_rag/paper_db/R000088_test2\"\n", "logging.basicConfig(format=\"%(levelname)s:%(message)s\", level=logging.INFO)\n", "if not os.path.exists(WORKING_DIR):\n", " os.mkdir(WORKING_DIR)\n", "os.environ[\"doubao_api\"] = \"6b890250-0cf6-4eb1-aa82-9c9d711398a7\"" ] }, { "cell_type": "code", "execution_count": 3, "id": "a5009d16e0851dca", "metadata": { "ExecuteTime": { "end_time": "2025-01-07T05:38:42.594315Z", "start_time": "2025-01-07T05:38:42.590800Z" } }, "outputs": [], "source": [ "async def llm_model_func(\n", " prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs\n", ") -> str:\n", " return await openai_complete_if_cache(\n", " \"ep-20241218114828-2tlww\",\n", " prompt,\n", " system_prompt=system_prompt,\n", " history_messages=history_messages,\n", " api_key=os.getenv(\"doubao_api\"),\n", " base_url=\"https://ark.cn-beijing.volces.com/api/v3\",\n", " **kwargs,\n", " )\n", "\n", "\n", "async def embedding_func(texts: list[str]) -> np.ndarray:\n", " return await openai_embedding(\n", " texts,\n", " model=\"ep-20241231173413-pgjmk\",\n", " api_key=os.getenv(\"doubao_api\"),\n", " base_url=\"https://ark.cn-beijing.volces.com/api/v3\",\n", " )" ] }, { "cell_type": "code", "execution_count": 4, "id": "397fcad24ce4d0ed", "metadata": { "ExecuteTime": { "end_time": "2025-01-07T05:38:44.016901Z", "start_time": "2025-01-07T05:38:44.006291Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:lightrag:Logger initialized for working directory: ../llm_rag/paper_db/R000088_test2\n", "INFO:lightrag:Load KV llm_response_cache with 0 data\n", "INFO:lightrag:Load KV full_docs with 0 data\n", "INFO:lightrag:Load KV text_chunks with 0 data\n", "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../llm_rag/paper_db/R000088_test2/vdb_entities.json'} 0 data\n", "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../llm_rag/paper_db/R000088_test2/vdb_relationships.json'} 0 data\n", "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../llm_rag/paper_db/R000088_test2/vdb_chunks.json'} 0 data\n", "INFO:lightrag:Loaded document status storage with 0 records\n" ] } ], "source": [ "rag = LightRAG(\n", " working_dir=WORKING_DIR,\n", " llm_model_func=llm_model_func,\n", " embedding_func=EmbeddingFunc(\n", " embedding_dim=4096, max_token_size=8192, func=embedding_func\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "id": "1dc3603677f7484d", "metadata": { "ExecuteTime": { "end_time": "2025-01-07T05:38:47.509111Z", "start_time": "2025-01-07T05:38:47.501997Z" } }, "outputs": [], "source": [ "with open(\n", " \"../llm_rag/example/R000088/auto/R000088_full_txt.md\", \"r\", encoding=\"utf-8\"\n", ") as f:\n", " content = f.read()\n", "\n", "\n", "async def embedding_func(texts: list[str]) -> np.ndarray:\n", " return await openai_embedding(\n", " texts,\n", " model=\"ep-20241231173413-pgjmk\",\n", " api_key=os.getenv(\"doubao_api\"),\n", " base_url=\"https://ark.cn-beijing.volces.com/api/v3\",\n", " )\n", "\n", "\n", "async def get_embedding_dim():\n", " test_text = [\"This is a test sentence.\"]\n", " embedding = await embedding_func(test_text)\n", " embedding_dim = embedding.shape[1]\n", " return embedding_dim" ] }, { "cell_type": "code", "execution_count": 6, "id": "6844202606acfbe5", "metadata": { "ExecuteTime": { "end_time": "2025-01-07T05:38:50.666764Z", "start_time": "2025-01-07T05:38:50.247712Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n" ] } ], "source": [ "embedding_dimension = await get_embedding_dim()" ] }, { "cell_type": "code", "execution_count": 7, "id": "d6273839d9681403", "metadata": { "ExecuteTime": { "end_time": "2025-01-07T05:42:33.085507Z", "start_time": "2025-01-07T05:38:56.789348Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:lightrag:Processing 1 new unique documents\n", "Processing batch 1: 0%| | 0/1 [00:00标签中,针对每个问题详细分析你的思考过程。然后在<回答>标签中给出所有问题的最终答案。\"\"\"" ] }, { "cell_type": "code", "execution_count": 9, "id": "7a6491385b050095", "metadata": { "ExecuteTime": { "end_time": "2025-01-07T05:43:24.751628Z", "start_time": "2025-01-07T05:42:50.865679Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n", "INFO:lightrag:kw_prompt result:\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"high_level_keywords\": [\"英文学术研究论文分析\", \"关键信息提取\", \"深入分析\"],\n", " \"low_level_keywords\": [\"研究队列\", \"队列名称\", \"队列开展国家\", \"性别分布\", \"年龄分布\", \"队列研究时间线\", \"实际参与研究人数\"]\n", "}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n", "INFO:lightrag:Local query uses 60 entites, 38 relations, 6 text units\n", "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n", "INFO:lightrag:Global query uses 72 entites, 60 relations, 4 text units\n", "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "<分析>\n", "- **分析对象来自哪些研究队列及是单独分析还是联合分析**:\n", " 通过查找论文内容,发现文中提到“This is a combined analysis of data from 2 randomized, double-blind, placebo-controlled clinical trials (Norwegian Vitamin [NORVIT] trial15 and Western Norway B Vitamin Intervention Trial [WENBIT]16)”,明确是对两个队列的数据进行联合分析,队列名称分别为“Norwegian Vitamin (NORVIT) trial”和“Western Norway B Vitamin Intervention Trial (WENBIT)”。\n", "- **队列开展的国家**:\n", " 文中多次提及研究在挪威进行,如“combined analyses and extended follow-up of 2 vitamin B intervention trials among patients with ischemic heart disease in Norway”,所以确定研究开展的国家是挪威。\n", "- **队列研究对象的性别分布**:\n", " 从“Mean (SD) age was 62.3 (11.0) years and 23.5% of participants were women”可知,研究对象包含男性和女性,即全体。\n", "- **队列收集结束时研究对象年龄分布**:\n", " 已知“Mean (SD) age was 62.3 (11.0) years”是基线时年龄信息,“Median (interquartile range) duration of extended follow-up through December 31, 2007, was 78 (61 - 90) months”,由于随访的中位时间是78个月(约6.5年),所以可推算队列收集结束时研究对象年龄均值约为62.3 + 6.5 = 68.8岁(标准差仍为11.0年)。\n", "- **队列研究时间线**:\n", " 根据“2 randomized, double-blind, placebo-controlled clinical trials (Norwegian Vitamin [NORVIT] trial15 and Western Norway B Vitamin Intervention Trial [WENBIT]16) conducted between 1998 and 2005, and an observational posttrial follow-up through December 31, 2007”可知,队列开始收集信息时间为1998年,结束时间为2007年12月31日。\n", "- **队列结束时实际参与研究人数**:\n", " 由“A total of 6837 individuals were included in the combined analyses, of whom 6261 (91.6%) participated in posttrial follow-up”可知,队列结束时实际参与研究人数为6261人。\n", "\n", "\n", "<回答>\n", "- 分析对象来自“Norwegian Vitamin (NORVIT) trial”和“Western Norway B Vitamin Intervention Trial (WENBIT)”两个研究队列,文中是对这两个队列的数据进行联合分析。\n", "- 队列开展的国家是挪威。\n", "- 队列研究对象的性别分布为全体。\n", "- 队列收集结束时,研究对象年龄分布均值约为68.8岁,标准差为11.0年。\n", "- 队列研究时间线为1998年开始收集信息/建立队列,2007年12月31日结束。\n", "- 队列结束时实际参与研究人数是6261人。\n" ] } ], "source": [ "print(rag.query(prompt1, param=QueryParam(mode=\"hybrid\")))" ] }, { "cell_type": "code", "execution_count": null, "id": "fef9d06983da47af", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }