MIT-Emerging-Talent · Elshikh-Amro · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/MIstral7B/Rag-test.py b/MIstral7B/Rag-test.py
@@ -0,0 +1,59 @@
+from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.llama_cpp import LlamaCPP
+
+# --- Configuration ---
+# Point to your downloaded model file
+MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf"  # <-- IMPORTANT: update this path
+
+# --- 1. Load the LLM (our quantized Mistral model) ---
+# This uses llama-cpp-python to run the GGUF model on your CPU
+llm = LlamaCPP(
+    model_path=MODEL_PATH,
+    # Model parameters - you can adjust these
+    temperature=0.1,
+    max_new_tokens=512,
+    context_window=3900,  # The model's context window size
+    generate_kwargs={},
+    model_kwargs={
+        "n_gpu_layers": -1
+    },  # Set to > 0 if you have a GPU and want to offload layers
+    verbose=True,
+)
+
+# --- 2. Configure the Embedding Model ---
+# This model creates numerical representations of your text for retrieval.
+# It runs locally on your machine.
+embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+
+# --- 3. Set up Global Settings ---
+# This tells LlamaIndex to use our chosen models.
+Settings.llm = llm
+Settings.embed_model = embed_model
+
+# --- 4. Load Your Data ---
+# This will load all files from the 'data' directory.
+print("Loading documents...")
+documents = SimpleDirectoryReader("D:/Mistral7B/data").load_data()
+print(f"Loaded {len(documents)} document(s).")
+
+# --- 5. Create the Index and Query Engine ---
+# The VectorStoreIndex will process your documents and build a searchable index.
+# The query engine connects the retriever (finds relevant text) with the LLM (generates answers).
+print("Indexing documents... (this may take a moment)")
+index = VectorStoreIndex.from_documents(documents)
+query_engine = index.as_query_engine(streaming=True)
+
+# --- 6. Start Querying ---
+print("\n--- Query Engine Ready ---")
+while True:
+    query = input("Ask a question about your documents: ")
+    if query.lower() == "exit":
+        break
+
+    response_stream = query_engine.query(query)
+
+    print("\nAssistant: ", end="")
+    # Stream the response to the console
+    response_stream.print_response_stream()
+    print("\n" + "-" * 50)
diff --git a/MIstral7B/With-Eval.py b/MIstral7B/With-Eval.py
@@ -0,0 +1,179 @@
+import time
+
+from codecarbon import OfflineEmissionsTracker
+from datasets import Dataset
+from langchain_community.llms import LlamaCpp
+from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.llama_cpp import LlamaCPP
+from ragas import evaluate
+from ragas.llms import LangchainLLMWrapper
+from ragas.metrics import answer_relevancy, faithfulness
+
+# --- 1. Configuration ---
+
+# Set the path to your downloaded GGUF model
+MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf"  # <-- IMPORTANT: Update this path if needed
+
+# Set the path to your data (PDFs, .txt, etc.)
+DATA_PATH = "D:/Mistral7B/data"  # <-- IMPORTANT: Update this path if needed
+
+# Set your country's ISO code for CodeCarbon
+# Find your code: https://en.wikipedia.org/wiki/List_of_ISO_3166-1_alpha-3_codes
+# Using "EGY" for Egypt as an example
+YOUR_COUNTRY_ISO_CODE = "EGY"
+
+# Define your "Golden Set" of test questions
+TEST_QUESTIONS = [
+    "What is the main topic of the document?",
+    # "Summarize the key findings in three bullet points.",
+    # ... add 10-15 more of your own questions ...
+    # "What is [a specific term] according to the text?",
+    # "What conclusion does the author reach?",
+]
+
+# --- 2. Initialize Models ---
+
+print("Initializing models...")
+
+# Load the local LLM (Mistral 7B)
+llm = LlamaCPP(
+    model_path=MODEL_PATH,
+    temperature=0.1,
+    max_new_tokens=512,
+    context_window=3900,
+    generate_kwargs={},
+    model_kwargs={"n_gpu_layers": 1},  # Set > 0 if you have GPU offloading
+    verbose=True,
+)
+
+# Load the local Embedding Model
+embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+
+# Set up LlamaIndex global settings
+Settings.llm = llm
+Settings.embed_model = embed_model
+
+# --- 3. Load & Index Documents ---
+
+print("Loading documents...")
+documents = SimpleDirectoryReader(DATA_PATH).load_data()
+print(f"Loaded {len(documents)} document(s).")
+
+print("Indexing documents... (this may take a moment)")
+index = VectorStoreIndex.from_documents(documents)
+query_engine = index.as_query_engine()
+print("Indexing complete.")
+
+# --- 4. Run Evaluation (Accuracy & Environmental Cost) ---
+
+# Initialize a list to hold our evaluation data
+eval_data = {
+    "question": [],
+    "answer": [],
+    "contexts": [],
+}
+
+# Initialize the CO2 Emissions Tracker
+print(f"\nInitializing CodeCarbon tracker for country: {YOUR_COUNTRY_ISO_CODE}")
+tracker = OfflineEmissionsTracker(country_iso_code=YOUR_COUNTRY_ISO_CODE)
+tracker.start()
+
+print("\n--- Starting Evaluation Loop ---")
+
+try:
+    for query in TEST_QUESTIONS:
+        print(f"\nQuerying: {query}")
+
+        # --- Start tracking for this specific query ---
+        tracker.start_task("RAG Query")
+        start_time = time.time()
+
+        # Run the query
+        response = query_engine.query(query)
+
+        # --- Stop tracking for this query ---
+        end_time = time.time()
+        # stop_task() returns an EmissionsData OBJECT
+        emissions_data = tracker.stop_task()
+
+        # Collect results for ragas
+        answer = str(response)
+        contexts = [node.get_content() for node in response.source_nodes]
+
+        eval_data["question"].append(query)
+        eval_data["answer"].append(answer)
+        eval_data["contexts"].append(contexts)
+
+        # --- Print Results for this Query ---
+        print(f"Answer: {answer}")
+        print("-" * 30)
+        print(f"Latency: {end_time - start_time:.2f} seconds")
+
+        # --- CORRECTED LINES ---
+        # Access attributes using dot notation
+        print(f"Emissions: {emissions_data.emissions * 1000:.6f} gCO2eq")
+        print(f"Energy: {emissions_data.energy_consumed * 1000:.6f} Wh")
+        # --- END OF CORRECTION ---
+
+        print("=" * 50)
+
+finally:
+    # --- CORRECTED LINES ---
+    # stop() returns a FLOAT (total_emissions_kg)
+    total_emissions_kg = tracker.stop()
+    print("\n--- Total Emissions Summary (Saved to emissions.csv) ---")
+    # Access total energy from the tracker object itself
+    print(
+        f"Total Energy Consumed: {tracker.final_emissions_data.energy_consumed * 1000:.4f} Wh"
+    )
+    print(f"Total CO2 Emitted: {total_emissions_kg * 1000:.4f} gCO2eq")
+    # --- END OF CORRECTION ---
+
+
+# --- 5. Run Ragas Accuracy Evaluation ---
+
+print("\n--- Running Ragas Accuracy Evaluation ---")
+
+# Convert your collected data into a Hugging Face Dataset object
+eval_dataset = Dataset.from_dict(eval_data)
+
+# --- Set up the Ragas evaluator to use YOUR local model ---
+# We must wrap our local model for Ragas to use it as a judge.
+# The easiest way is to use the Langchain wrapper.
+
+# 1. Import the required LangChain and Ragas wrapper classes
+#    You may need to run: pip install langchain-community
+
+
+# 2. Create a new LangChain LlamaCpp object *just for evaluation*
+#    This points to the same model file.
+eval_llm = LlamaCpp(
+    model_path=MODEL_PATH,
+    n_gpu_layers=1,  # Match your settings from Section 2
+    n_batch=512,  # Match your settings
+    n_ctx=3900,  # Match your settings
+    temperature=0,  # Evaluators should be deterministic
+    verbose=False,
+)
+# 3. Wrap the LangChain object for Ragas
+ragas_llm = LangchainLLMWrapper(eval_llm)
+
+# 4. Run the evaluation, passing the wrapped LLM and embeddings directly
+result = evaluate(
+    eval_dataset,
+    metrics=[
+        faithfulness,
+        answer_relevancy,
+    ],
+    llm=ragas_llm,  # <-- Pass the evaluator LLM here
+    embeddings=embed_model,  # <-- Pass the embeddings here
+)
+
+print("\n--- Ragas Accuracy Results ---")
+print(result)
+
+# The result will be a dictionary like:
+# {'faithfulness': 0.85, 'answer_relevancy': 0.92}
+
+print("\n--- Project Evaluation Complete ---")
diff --git a/MIstral7B/data/Machine Learning.pdf b/MIstral7B/data/Machine Learning.pdf
diff --git a/MIstral7B/evaluate_results.py b/MIstral7B/evaluate_results.py
@@ -0,0 +1,119 @@
+import json
+
+from datasets import Dataset
+from langchain_community.llms import LlamaCpp  # Use LangChain's LlamaCpp for evaluation
+from llama_index.embeddings.huggingface import (
+    HuggingFaceEmbedding,
+)  # Need this for embeddings
+from ragas import evaluate
+from ragas.llms import LangchainLLMWrapper
+from ragas.metrics import answer_relevancy, faithfulness
+
+# --- 1. Configuration ---
+MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf"  # Same model used for generation
+INPUT_FILE = "D:/Mistral7B/rag_results.json"  # The file saved by the previous script
+
+# --- 2. Load the Saved Results ---
+print(f"Loading results from {INPUT_FILE}...")
+with open(INPUT_FILE, "r", encoding="utf-8") as f:
+    loaded_data = json.load(f)
+
+# Convert the list of dictionaries into a Hugging Face Dataset
+# Ragas requires columns named 'question', 'answer', 'contexts'
+eval_dataset = Dataset.from_list([loaded_data[0]])  # Only evaluate the first item
+print(f"Loaded {len(eval_dataset)} results.")
+
+# --- 3. Initialize Evaluator Model and Embeddings ---
+print("Initializing evaluator models...")
+# ... (gpu_layers = 0 setting) ...
+
+eval_llm = LlamaCpp(
+    model_path=MODEL_PATH,
+    # ... other parameters ...
+    n_ctx=1024,  # Keep reduced context
+    # ...
+)
+ragas_llm = LangchainLLMWrapper(eval_llm)
+
+# --- ADD THIS TEST BLOCK ---
+print("\n--- Testing eval_llm directly ---")
+try:
+    test_prompt = (
+        "Explain the importance of testing in software development in one sentence."
+    )
+    print(f"Sending test prompt: {test_prompt}")
+    response = eval_llm.invoke(test_prompt)
+    print(f"Test response received: {response}")
+    print("--- eval_llm test successful ---\n")
+except Exception as e:
+    print("--- eval_llm test FAILED ---")
+    print(f"Error during direct invocation: {e}")
+    import traceback
+
+    traceback.print_exc()
+    # Decide if you want to exit here or continue to ragas evaluation
+    # exit() # Uncomment to stop if the direct test fails
+# --- END OF TEST BLOCK ---
+
+# Ragas metrics might also need embeddings
+embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+
+# --- 4. Run Ragas Evaluation ---
+print("\n--- Running Ragas Accuracy Evaluation ---")
+
+try:
+    print("Starting Ragas evaluate()...")  # <-- ADDED
+    result = evaluate(
+        eval_dataset,
+        metrics=[
+            faithfulness,
+            answer_relevancy,
+        ],
+        llm=ragas_llm,
+        embeddings=embed_model,
+        # raise_exceptions=False # Optional: Try adding this if it keeps crashing
+    )
+    print("Ragas evaluate() finished.")  # <-- ADDED
+
+    print("\n--- Ragas Accuracy Results ---")
+    print(result)  # <-- KEEP THIS
+
+    # Save results to a file for later analysis
+    print("Preparing to save results to JSON...")  # <-- ADDED
+    with open("ragas_evaluation_results.json", "w") as f:
+        # Convert numpy values to Python native types for JSON serialization
+        import numpy as np
+
+        # Check if result is not None and is a dictionary before processing
+        if result and isinstance(result, dict):
+            result_dict = {
+                k: float(v)
+                if isinstance(v, (np.number, float)) and not np.isnan(v)
+                else None
+                for k, v in result.items()
+            }
+            print(
+                f"Result dictionary prepared: {result_dict}"
+            )  # <-- ADDED (optional, can be verbose)
+            json.dump(result_dict, f, indent=4)
+            print("Results saved to ragas_evaluation_results.json")  # <-- ADDED
+        else:
+            print(
+                "Evaluation result was None or not a dictionary, skipping save."
+            )  # <-- ADDED
+
+except Exception as e:
+    print("\n--- Evaluation Error ---")
+    print(f"Error during evaluation or saving: {e}")  # <-- MODIFIED
+    import traceback
+
+    traceback.print_exc()
+
+# Make sure to explicitly delete the model to avoid memory issues
+if "eval_llm" in locals():
+    print("Deleting LLM objects...")  # <-- ADDED
+    del ragas_llm
+    del eval_llm
+    print("LLM objects deleted.")  # <-- ADDED
+
+print("\n--- Evaluation Script Finished ---")
diff --git a/MIstral7B/testing-gpu.py b/MIstral7B/testing-gpu.py
@@ -0,0 +1,20 @@
+from llama_cpp import Llama
+
+# --- Configuration ---
+# IMPORTANT: Update this to the correct path on your machine
+MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+
+print("Attempting to load model with GPU...")
+
+try:
+    llm = Llama(
+        model_path=MODEL_PATH,
+        n_gpu_layers=-1,  # Try to offload all layers to GPU
+        verbose=True,  # This is the most important part!
+    )
+    print("\n--- TEST SUCCESSFUL ---")
+    # Check the output above for lines mentioning CUDA or cuBLAS and layer offloading
+
+except Exception as e:
+    print("\n--- TEST FAILED ---")
+    print(f"An error occurred: {e}")