chore: added pre-commit-hooks and ruff formatting for commit-hooks

This commit is contained in:
Sanketh Kumar
2024-10-19 09:43:17 +05:30
parent 99bd644bf7
commit 32464fab4e
26 changed files with 635 additions and 393 deletions

View File

@@ -1,8 +1,8 @@
import os
import json
from openai import OpenAI
from transformers import GPT2Tokenizer
def openai_complete_if_cache(
model="gpt-4o", prompt=None, system_prompt=None, history_messages=[], **kwargs
) -> str:
@@ -19,24 +19,26 @@ def openai_complete_if_cache(
)
return response.choices[0].message.content
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
def get_summary(context, tot_tokens=2000):
tokens = tokenizer.tokenize(context)
half_tokens = tot_tokens // 2
start_tokens = tokens[1000:1000 + half_tokens]
end_tokens = tokens[-(1000 + half_tokens):1000]
start_tokens = tokens[1000 : 1000 + half_tokens]
end_tokens = tokens[-(1000 + half_tokens) : 1000]
summary_tokens = start_tokens + end_tokens
summary = tokenizer.convert_tokens_to_string(summary_tokens)
return summary
clses = ['agriculture']
clses = ["agriculture"]
for cls in clses:
with open(f'../datasets/unique_contexts/{cls}_unique_contexts.json', mode='r') as f:
with open(f"../datasets/unique_contexts/{cls}_unique_contexts.json", mode="r") as f:
unique_contexts = json.load(f)
summaries = [get_summary(context) for context in unique_contexts]
@@ -67,10 +69,10 @@ for cls in clses:
...
"""
result = openai_complete_if_cache(model='gpt-4o', prompt=prompt)
result = openai_complete_if_cache(model="gpt-4o", prompt=prompt)
file_path = f"../datasets/questions/{cls}_questions.txt"
with open(file_path, "w") as file:
file.write(result)
print(f"{cls}_questions written to {file_path}")
print(f"{cls}_questions written to {file_path}")