sugarlabs · XXXJumpingFrogXXX · Oct 9, 2024 · Oct 16, 2024 · Dec 15, 2024 · chimosky
diff --git a/Pippy/router.py b/Pippy/router.py
@@ -0,0 +1,102 @@
+from fastapi import APIRouter
+from pydantic import BaseModel
+import os
+import warnings
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
+from langchain.chains import RetrievalQA
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_ollama.llms import OllamaLLM
+
+# Suppress warnings
+warnings.filterwarnings("ignore", category=FutureWarning)
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+
+# Define document paths
+document_paths = [
+    '/home/kshitij/Downloads/AI-model/Pygame Documentation.pdf',
+    '/home/kshitij/Downloads/AI-model/AI-model(Streamlitfree)/Python GTK+3 Documentation.pdf',
+]
+
+# Define the Pydantic model for input
+class Question(BaseModel):
+    query: str
+
+router = APIRouter()
+
+# Helper function to set up the vector store
+def setup_vectorstore(file_paths):
+    try:
+        all_documents = []
+        for file_path in file_paths:
+            if os.path.exists(file_path):
+                print(f"Loading document from: {file_path}")
+                if file_path.endswith(".pdf"):
+                    loader = PyMuPDFLoader(file_path)
+                else:
+                    loader = TextLoader(file_path)
+
+                documents = loader.load()
+                print(f"Loaded {len(documents)} documents from {file_path}.")
+                all_documents.extend(documents)
+            else:
+                print(f"File not found: {file_path}")
+
+        embeddings = HuggingFaceEmbeddings()
+        vector_store = FAISS.from_documents(all_documents, embeddings)
+        return vector_store.as_retriever()
+
+    except Exception as e:
+        print(f"Failed to set up the retriever: {e}")
+        return None
+
+# System prompt definition
+system_prompt = """
+You are a highly intelligent Python coding assistant with access to both general knowledge and specific Pygame documentation.
+1. You only have to answer Python and GTK based coding queries.
+2. Prioritize answers based on the documentation when the query is related to it. However make sure you are not biased towards documentation provided to you.
+3. Make sure that you don't mention words like context or documentation stating what has been provided to you.
+4. Provide step-by-step explanations wherever applicable.
+5. If the documentation does not contain relevant information, use your general knowledge.
+6. Always be clear, concise, and provide examples where necessary.
+"""
+
+template = f"""{system_prompt}
+Question: {{question}}
+Answer: Let's think step by step.
+"""
+prompt = ChatPromptTemplate.from_template(template)
+model = OllamaLLM(model="llama3.1")
+
+retriever = setup_vectorstore(document_paths)
+
+if retriever:
+    rag_chain = RetrievalQA.from_chain_type(llm=model, chain_type="stuff", retriever=retriever)
+else:
+    raise RuntimeError("Unable to initialize retriever. Check document paths.")
+
+@router.post("/generate_answer")
+def generate_answer(question: Question):
+    try:
+        # Retrieve relevant documents
+        results = retriever.get_relevant_documents(question.query)
+        if results:
+            print("Relevant document found. Using document-specific response...")
+            response = rag_chain({"query": question.query})
+            return {
+                "success": True,
+                "response": response.get("result", "No result found.")
+            }
+        else:
+            print("No relevant document found. Using general knowledge response...")
+            response = model.invoke(question.query)
+            return {
+                "success": True,
+                "response": response
+            }
+    except Exception as e:
+        return {
+            "success": False,
+            "error": str(e)
+        }
diff --git a/chat/router.py b/chat/router.py
@@ -0,0 +1,125 @@
+from fastapi import APIRouter
+from pydantic import BaseModel
+
+from unsloth import FastLanguageModel
+import torch
+
+max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
+dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
+load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
+
+alpaca_prompt = """Below is an instruction that describes a task, along with an input that provides additional context. Write a response that appropriately completes the request.
+
+### Instruction:
+{}
+
+### Input:
+{}
+
+### Response:
+{}"""
+
+class Question(BaseModel):
+    query: str
+
+router = APIRouter()
+
+@router.post("/generate_answer")
+def generate_answer(value: Question):
+    try:
+        # Load the llama model and tokenizer from the pretrained model
+        llama_model, llama_tokenizer = FastLanguageModel.from_pretrained(
+            model_name="Antonio27/llama3-8b-4-bit-for-sugar",
+            max_seq_length=max_seq_length,
+            dtype=dtype,
+            load_in_4bit=load_in_4bit,
+        )
+
+        # Load the gemma model and tokenizer from the pretrained model
+        gemma_model, gemma_tokenizer = FastLanguageModel.from_pretrained(
+            model_name="unsloth/gemma-2-9b-it-bnb-4bit",
+            max_seq_length=max_seq_length,
+            dtype=dtype,
+            load_in_4bit=load_in_4bit,
+        )
+
+        # Prepare llama model for inference
+        FastLanguageModel.for_inference(llama_model)
+        llama_tokenizer.pad_token = llama_tokenizer.eos_token
+        llama_tokenizer.add_eos_token = True
+
+        # Tokenize the input question for the llama model
+        inputs = llama_tokenizer(
+            [
+                alpaca_prompt.format(
+                    f'''
+                    Your task is to answer children's questions using simple language.
+                    Explain any difficult words in a way a 3-year-old can understand.
+                    Keep responses under 60 words.
+                    \n\nQuestion: {value.query}
+                    ''',  # instruction
+                    "",  # input
+                    "",  # output - leave this blank for generation!
+                )
+            ], return_tensors="pt").to("cuda")
+
+        # Generate output using the llama model
+        outputs = llama_model.generate(**inputs, max_new_tokens=256, temperature=0.6)
+        decoded_outputs = llama_tokenizer.batch_decode(outputs)
+
+        # Extract the response text
+        response_text = decoded_outputs[0]
+
+        # Use regex to find the response section in the output
+        match = re.search(r"### Response:(.*?)(?=\n###|$)", response_text, re.DOTALL)
+        if match:
+            initial_response = match.group(1).strip()
+        else:
+            initial_response = ""
+
+        # Prepare gemma model for inference
+        FastLanguageModel.for_inference(gemma_model)
+        gemma_tokenizer.pad_token = gemma_tokenizer.eos_token
+        gemma_tokenizer.add_eos_token = True
+
+        # Tokenize the initial response for the gemma model
+        inputs = gemma_tokenizer(
+            [
+                alpaca_prompt.format(
+                    f'''
+                    Modify the given content for a 5-year-old.
+                    Use simple words and phrases.
+                    Remove any repetitive information.
+                    Keep responses under 50 words.
+                    \n\nGiven Content: {initial_response}
+                    ''',  # instruction
+                    "",  # input
+                    "",  # output - leave this blank for generation!
+                )
+            ], return_tensors="pt").to("cuda")
+
+        # Generate adjusted output using the gemma model
+        outputs = gemma_model.generate(**inputs, max_new_tokens=256, temperature=0.6)
+        decoded_outputs = gemma_tokenizer.batch_decode(outputs)
+
+        # Extract the adjusted response text
+        response_text = decoded_outputs[0]
+
+        # Use regex to find the response section in the output
+        match = re.search(r"### Response:(.*?)(?=\n###|$)", response_text, re.DOTALL)
+        if match:
+            adjusted_response = match.group(1).strip()
+        else:
+            adjusted_response = ""
+
+        # Return the final adjusted response in a success dictionary
+        return {
+            'success': True,
+            'response': {
+                "result": adjusted_response
+            }
+        }
+
+    except Exception as e:
+        return {'success': False, 'response': str(e)}
+
diff --git a/main.py b/main.py
@@ -1,27 +1,26 @@
-
-from transformers import GPT2Tokenizer, GPT2LMHeadModel
-
-
-# We should rename this
-class AI_Test:
-    def __init__(self):
-        pass
-
-    def generate_bot_response(self, question):
-        tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
-        model = GPT2LMHeadModel.from_pretrained("distilgpt2")
-
-        prompt = '''
-        Your task is to answer children's questions using simple language.
-        Explain any difficult words in a way a 3-year-old can understand.
-        Keep responses under 60 words.
-        \n\nQuestion:
-        '''
-
-        input_text = prompt + question
-
-        inputs = tokenizer.encode(input_text, return_tensors='pt')
-        outputs = model.generate(inputs, max_length=150, num_return_sequences=1)
-        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
-
-        return answer
+import os
+import uvicorn
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from chat.router import router as chat_router
+# from piggy.router import router as piggy_router
+
+# Create a FastAPI application instance with custom documentation URL
+app = FastAPI(
+    docs_url="/sugar-ai/docs",
+)
+
+# Include the chat router with a specified prefix for endpoint paths
+app.include_router(chat_router, prefix="/sugar-ai/chat")
+# Include the piggy router with a specified prefix for endpoint paths (currently commented out)
+# app.include_router(piggy_router, prefix="/sugar-ai/piggy")
+
+# Add CORS middleware to allow cross-origin requests from any origin
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allow requests from any origin
+    allow_credentials=True,  # Allow sending of credentials (e.g., cookies)
+    allow_methods=["*"],  # Allow all HTTP methods
+    allow_headers=["*"],  # Allow all headers
+)
diff --git a/original_main.py b/original_main.py
@@ -0,0 +1,27 @@
+
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+
+
+# We should rename this
+class AI_Test:
+    def __init__(self):
+        pass
+
+    def generate_bot_response(self, question):
+        tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
+        model = GPT2LMHeadModel.from_pretrained("distilgpt2")
+
+        prompt = '''
+        Your task is to answer children's questions using simple language.
+        Explain any difficult words in a way a 3-year-old can understand.
+        Keep responses under 60 words.
+        \n\nQuestion:
+        '''
+
+        input_text = prompt + question
+
+        inputs = tokenizer.encode(input_text, return_tensors='pt')
+        outputs = model.generate(inputs, max_length=150, num_return_sequences=1)
+        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+        return answer