From fba232487b94c568e52504e4a9a5da14d0448d9f Mon Sep 17 00:00:00 2001 From: Larfii <834462287@qq.com> Date: Mon, 7 Oct 2024 18:48:45 +0800 Subject: [PATCH] update --- examples/batch_eval.py | 112 ++++++++++++++++++ examples/insert.py | 19 +++ examples/query.py | 17 +++ lightrag/__pycache__/lightrag.cpython-310.pyc | Bin 7680 -> 7599 bytes lightrag/__pycache__/llm.cpython-310.pyc | Bin 2973 -> 2571 bytes lightrag/llm.py | 9 +- 6 files changed, 152 insertions(+), 5 deletions(-) create mode 100644 examples/batch_eval.py create mode 100644 examples/insert.py create mode 100644 examples/query.py diff --git a/examples/batch_eval.py b/examples/batch_eval.py new file mode 100644 index 00000000..753ecb7d --- /dev/null +++ b/examples/batch_eval.py @@ -0,0 +1,112 @@ +import os +import re +import json +import jsonlines + +from openai import OpenAI + + +def batch_eval(query_file, result1_file, result2_file, output_file_path, api_key): + client = OpenAI(api_key=api_key) + + with open(query_file, 'r') as f: + data = f.read() + + queries = re.findall(r'- Question \d+: (.+)', data) + + with open(result1_file, 'r') as f: + answers1 = json.load(f) + answers1 = [i['result'] for i in answers1] + + with open(result2_file, 'r') as f: + answers2 = json.load(f) + answers2 = [i['result'] for i in answers2] + + requests = [] + for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2)): + sys_prompt = f""" + ---Role--- + You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. + """ + + prompt = f""" + You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. + + - **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question? + - **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question? + - **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic? + + For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories. + + Here is the question: + {query} + + Here are the two answers: + + **Answer 1:** + {answer1} + + **Answer 2:** + {answer2} + + Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion. + + Output your evaluation in the following JSON format: + + {{ + "Comprehensiveness": {{ + "Winner": "[Answer 1 or Answer 2]", + "Explanation": "[Provide explanation here]" + }}, + "Empowerment": {{ + "Winner": "[Answer 1 or Answer 2]", + "Explanation": "[Provide explanation here]" + }}, + "Overall Winner": {{ + "Winner": "[Answer 1 or Answer 2]", + "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]" + }} + }} + """ + + + request_data = { + "custom_id": f"request-{i+1}", + "method": "POST", + "url": "/v1/chat/completions", + "body": { + "model": "gpt-4o-mini", + "messages": [ + {"role": "system", "content": sys_prompt}, + {"role": "user", "content": prompt} + ], + } + } + + requests.append(request_data) + + with jsonlines.open(output_file_path, mode='w') as writer: + for request in requests: + writer.write(request) + + print(f"Batch API requests written to {output_file_path}") + + batch_input_file = client.files.create( + file=open(output_file_path, "rb"), + purpose="batch" + ) + batch_input_file_id = batch_input_file.id + + batch = client.batches.create( + input_file_id=batch_input_file_id, + endpoint="/v1/chat/completions", + completion_window="24h", + metadata={ + "description": "nightly eval job" + } + ) + + print(f'Batch {batch.id} has been created.') + +if __name__ == "__main__": + batch_eval() \ No newline at end of file diff --git a/examples/insert.py b/examples/insert.py new file mode 100644 index 00000000..95a3e150 --- /dev/null +++ b/examples/insert.py @@ -0,0 +1,19 @@ +import os +import sys +sys.path.append('xxx/xxx/LightRAG') + +from lightrag import LightRAG + +os.environ["OPENAI_API_KEY"] = "" + +WORKING_DIR = "" + +if not os.path.exists(WORKING_DIR): + os.mkdir(WORKING_DIR) + +rag = LightRAG(working_dir=WORKING_DIR) + +with open('./text.txt', 'r') as f: + text = f.read() + +rag.insert(text) \ No newline at end of file diff --git a/examples/query.py b/examples/query.py new file mode 100644 index 00000000..60bf9346 --- /dev/null +++ b/examples/query.py @@ -0,0 +1,17 @@ +import os +import sys +sys.path.append('xxx/xxx/LightRAG') + +from lightrag import LightRAG, QueryParam + +os.environ["OPENAI_API_KEY"] = "" + +WORKING_DIR = "" + +rag = LightRAG(working_dir=WORKING_DIR) + +mode = 'global' +query_param = QueryParam(mode=mode) + +result, _ = rag.query("", param=query_param) +print(result) \ No newline at end of file diff --git a/lightrag/__pycache__/lightrag.cpython-310.pyc b/lightrag/__pycache__/lightrag.cpython-310.pyc index 91010cbb9d6f3592b9c2a8747007af1de75c360b..c378b67e9161b2a876213d7e81f73ea5135ecb81 100644 GIT binary patch delta 2001 zcmZvd-)kII6vyYzOeWj?ogbT@yQVa4l5LXOA0nEhwkB<=QIs03b{_fB^wna*tQ zOd1jsw-8?xLXiu;ghCedA%YYMg8zV65PVez^tF9ZeDOg0ZxDDb@i)b;NJ&qPg+x0|bCK7xn>47LL zjc>xXMoMUje`#sZGOQ#l$vulpC`2%_ptsS~o(@`j&;e@a1_9=fMh-?@G>u;MNT!)8 zq1jCpjNQW67KOkiw4HWTYVeM?P<~9{r)hjmX+aN(dc;w-erCpn{*e*|{d8yd_ z%4*WFE7gFRm1B-qSv~r)SE=xaiP{*JXGaO;W5+|X7?PXKwFXldpK@$JD0|my&)Iw{ z`Agz8EbTmElJnHWnWj02MWV;6reC$)im`aJSO{g$57n9(R6}*avt2fZEBg_Bh{HUW zF80mf@FL=M#0A6~h*^F)J(M_u!%4(B{z3XgVj71U;w=9>#jv}=lws67KQL_9 z4h(~W{{MSr({s>6>&#l?6Z?T}Udnh|6555E*At~>7 zice9y99_<5{C2zl)LtfYxDA0^OiB&vm5oJf#gEpDCvY9?cX8iW`O_UASqm^+)ymZ+ zcX6CmVWKClEjf-@Y11NM-Jlg^hKcAhtwHG4M^X0${R%w%EI^awmh7K0ksqqE`j_^Q zE%33<`K$}3rgO~k+nwDRk@r7&-Q!<&+To=qKK)cmJ!`zpyyQt?!>O}-?}D_;VybhVET)`p7a9`o`8k92ThkC zqA!Ev5B%-!TQiSa1cPAvgqnF{$zm%-HOhG%4eufTr*jxuAM6hFjF2{6t2kcCbabt{ zQnDG<`Q_fh`W0Ncm#b)b88xgLYeTdn@DO4AlIbj2=Na=@m@q^d;>!P%iymW^U-Pgi zdtE3FR);u0!^g*+S$@u5Z_@arAKKlF3Lhh`#xzEmgf4}+7 zZ-4WfyDy%*HJbL*>4XM<#ehuz@MZe0L8SQD*qf6X&t5R?hV60F_5HdOzZgYQ7JnNK z8Q7ePJgM)NhUYKHc)-~;?#$V&3}ITZPi#aF5V`qd^a3Hzi-GvQ;ua4z-G{3V3O`=S zZ!w3K&0phJ$#R2e8YNttAyglIJTgW#Q1mrpNt=n#$hv-$@NG+Eg%FKL*J5-V>i4w! z`Z_2yx@OQAjo;RId`Vj&Yuneglki+8H+Aq%(Cth5J&h*r8|ba#CUN~b2`2e6hEvnW z;kdSz3e7}=XPTzMur$4@FKK2*X_;t1Q{3#Um6Vm^skfXeQ-#USxFkL%>gEv7yXUGSM;zu}ok) zE2uf%v^m=-@07Y9$e8Ul9Oip6Qnz?bMrM4+W5XC?2cn4R7Z1|q;sg#aA+8`M5f#K) zk<9E#p2XoX#Az{_8BU(UVH7bgu4nqy%Y-k0eJRtXS@#2OI-bK#lY$gn10?hWY175G zna|=cz_e_z0dXVSM{453Y@v5ANLz~#b-ci4{FBY^vuE_aSHMG(>Q<-3D0Qk~r9Ky9 z?ZM&ZRXLB_5Xi@()TDmZoV6E&W}++r`zG#tQT*2a{@FSVZ$zuLd2jXr^I%d*UYmDa zwbG$w!U~|3b%u#lbXtedu}4w21pQik)6qlf;-`)aZ5+H?ay~D{It$t6^~M~&6SUJI zm&C))l64j4(r|50osWzySj>Y{l_@AE76Y>JY}SuK(}=@}Lx`sVGCE(U7PrH5oCc)= z3a%?hSq;5hF`3^rAPq>Iy@mp&0%x&+<9S2_p}t^dVYDIc=6~O#4lLc$BC0!V^u(iO zxD%t|qpp!b+(mBVRxle>QUUjwHEm<=yrl>#`kUbRvq%?JuFop${x%azgE;*`Uv6tSMZW$ZxK%vH6xj#?WchiFIO zOUT3}%bmB!8S|M;nko%d^8e&gi8DK>`*rW)nD!Axel&#A(D% z#2G{$aTd{wz;6>7neu&?y^Ql7#5sh6m_uNvF>DC78=(#y%gFG}TdI*%g;7=BFS>e9 zl#a(E@u#}mx+AFrY(Myx2f|Xm4r3`Ay}RKz`-5H%OqEUy%G~iOXhf^Z$MM&D5`Mhd R0PcWGTeb;z5jRVR{s96l;gtXY diff --git a/lightrag/__pycache__/llm.cpython-310.pyc b/lightrag/__pycache__/llm.cpython-310.pyc index 03af12fa3c15e17cfb6dc7bd5696b457cf3da825..63b3832ea9396f1c39889a6bd9bd40b5a0764b07 100644 GIT binary patch delta 319 zcmXYqPfG$(6vgj-@6Dfd)R1&^{IhDKet~FH3n43rAll4kknSs*N@0|s7@-(jg@Fuh zqCl6m^&|8x>|3;JRZr=_@16q(?s>Bx*V5foDgjFEcYd~>9H+~;F>ZYCSICxmnJogP zB=hZ4K?N!fwFo103$jSg1q)sX2rnJip%Rsc;FhE_Da%p>Q$*DqEvZ#!x0pdThgpW# zh07LW{}ReI#iydTsm?w2Aw;x83f#Eemay z0BL>EKtfjEB!TFI@s&S7;-mk7f1z(;1n-K>{GqoeumoH*B~8w z4}3vLA58vyHB9_uaFoGbg5=>chez;ekLyAg3L_XKri^H1kn$J~^c;mzGKPaSq<_K; zhI*h7z~gwL#}tAja5PSWUFH~oR&xIkx_wfC8?b%eYu3V=uFBz}EG31RHEBwVF0N$a zQY^U`D;PO(X*O2Pk>uuj&DhX3mKDCWJhv*4DCwkYajBlKR`UGQ`dUp$$8#B})HGIC zG-3X^Ha(}cHw!g^%o{{0i`f_XteWE=yyQ#Kd0iGG@*HW1t`*X#hNlxJZFG+l!QR1t z<7jkHe4}VSqO=PbwxGqBfYBaoArr|UBg~o%=CEUrea2uW#cZ)H_K3qy?CLQlD?4VH zE*#T8huj(c7VTpFaejp*>x=8dwvQgs4(I9zCw0%IpVgnxE0LviZT1vSitiN7JCt_m z1Q<+v_s9g3>VITS_A~6ml|EJCGMxcMRU6HU+6o&};i>x%YaO1hIomWAjFcn@DZbT- zMpi3rh0i7`V!m96w_|!LDJ}8o*#)JszM$nd;xmyDtB+7k{jNj^9Ce&b)WU)euFJ3jD$o}_7L!usLfAJY>wKXVt0 ujgmr&#WPo_*=~qcRZm*)`zB%0I_-Nr6m)QX+$BQ&7jBOm+-QK~e*Fa=oUXS3 diff --git a/lightrag/llm.py b/lightrag/llm.py index 4b4f7e94..ee700a10 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -17,10 +17,9 @@ from .utils import compute_args_hash, wrap_embedding_func_with_attrs retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), ) async def openai_complete_if_cache( - model, prompt, api_key='' -, system_prompt=None, history_messages=[], **kwargs + model, prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: - openai_async_client = AsyncOpenAI(api_key=api_key) + openai_async_client = AsyncOpenAI() hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None) messages = [] if system_prompt: @@ -72,8 +71,8 @@ async def gpt_4o_mini_complete( wait=wait_exponential(multiplier=1, min=4, max=10), retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), ) -async def openai_embedding(texts: list[str], api_key='') -> np.ndarray: - openai_async_client = AsyncOpenAI(api_key=api_key) +async def openai_embedding(texts: list[str]) -> np.ndarray: + openai_async_client = AsyncOpenAI() response = await openai_async_client.embeddings.create( model="text-embedding-3-small", input=texts, encoding_format="float" )