Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions MIstral7B/Rag-test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.llama_cpp import LlamaCPP

# --- Configuration ---
# Point to your downloaded model file
MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" # <-- IMPORTANT: update this path

# --- 1. Load the LLM (our quantized Mistral model) ---
# This uses llama-cpp-python to run the GGUF model on your CPU
llm = LlamaCPP(
model_path=MODEL_PATH,
# Model parameters - you can adjust these
temperature=0.1,
max_new_tokens=512,
context_window=3900, # The model's context window size
generate_kwargs={},
model_kwargs={
"n_gpu_layers": -1
}, # Set to > 0 if you have a GPU and want to offload layers
verbose=True,
)

# --- 2. Configure the Embedding Model ---
# This model creates numerical representations of your text for retrieval.
# It runs locally on your machine.
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# --- 3. Set up Global Settings ---
# This tells LlamaIndex to use our chosen models.
Settings.llm = llm
Settings.embed_model = embed_model

# --- 4. Load Your Data ---
# This will load all files from the 'data' directory.
print("Loading documents...")
documents = SimpleDirectoryReader("D:/Mistral7B/data").load_data()
print(f"Loaded {len(documents)} document(s).")

# --- 5. Create the Index and Query Engine ---
# The VectorStoreIndex will process your documents and build a searchable index.
# The query engine connects the retriever (finds relevant text) with the LLM (generates answers).
print("Indexing documents... (this may take a moment)")
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine(streaming=True)

# --- 6. Start Querying ---
print("\n--- Query Engine Ready ---")
while True:
query = input("Ask a question about your documents: ")
if query.lower() == "exit":
break

response_stream = query_engine.query(query)

print("\nAssistant: ", end="")
# Stream the response to the console
response_stream.print_response_stream()
print("\n" + "-" * 50)
179 changes: 179 additions & 0 deletions MIstral7B/With-Eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import time

from codecarbon import OfflineEmissionsTracker
from datasets import Dataset
from langchain_community.llms import LlamaCpp
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.llama_cpp import LlamaCPP
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import answer_relevancy, faithfulness

# --- 1. Configuration ---

# Set the path to your downloaded GGUF model
MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" # <-- IMPORTANT: Update this path if needed

# Set the path to your data (PDFs, .txt, etc.)
DATA_PATH = "D:/Mistral7B/data" # <-- IMPORTANT: Update this path if needed

# Set your country's ISO code for CodeCarbon
# Find your code: https://en.wikipedia.org/wiki/List_of_ISO_3166-1_alpha-3_codes
# Using "EGY" for Egypt as an example
YOUR_COUNTRY_ISO_CODE = "EGY"

# Define your "Golden Set" of test questions
TEST_QUESTIONS = [
"What is the main topic of the document?",
# "Summarize the key findings in three bullet points.",
# ... add 10-15 more of your own questions ...
# "What is [a specific term] according to the text?",
# "What conclusion does the author reach?",
]

# --- 2. Initialize Models ---

print("Initializing models...")

# Load the local LLM (Mistral 7B)
llm = LlamaCPP(
model_path=MODEL_PATH,
temperature=0.1,
max_new_tokens=512,
context_window=3900,
generate_kwargs={},
model_kwargs={"n_gpu_layers": 1}, # Set > 0 if you have GPU offloading
verbose=True,
)

# Load the local Embedding Model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Set up LlamaIndex global settings
Settings.llm = llm
Settings.embed_model = embed_model

# --- 3. Load & Index Documents ---

print("Loading documents...")
documents = SimpleDirectoryReader(DATA_PATH).load_data()
print(f"Loaded {len(documents)} document(s).")

print("Indexing documents... (this may take a moment)")
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
print("Indexing complete.")

# --- 4. Run Evaluation (Accuracy & Environmental Cost) ---

# Initialize a list to hold our evaluation data
eval_data = {
"question": [],
"answer": [],
"contexts": [],
}

# Initialize the CO2 Emissions Tracker
print(f"\nInitializing CodeCarbon tracker for country: {YOUR_COUNTRY_ISO_CODE}")
tracker = OfflineEmissionsTracker(country_iso_code=YOUR_COUNTRY_ISO_CODE)
tracker.start()

print("\n--- Starting Evaluation Loop ---")

try:
for query in TEST_QUESTIONS:
print(f"\nQuerying: {query}")

# --- Start tracking for this specific query ---
tracker.start_task("RAG Query")
start_time = time.time()

# Run the query
response = query_engine.query(query)

# --- Stop tracking for this query ---
end_time = time.time()
# stop_task() returns an EmissionsData OBJECT
emissions_data = tracker.stop_task()

# Collect results for ragas
answer = str(response)
contexts = [node.get_content() for node in response.source_nodes]

eval_data["question"].append(query)
eval_data["answer"].append(answer)
eval_data["contexts"].append(contexts)

# --- Print Results for this Query ---
print(f"Answer: {answer}")
print("-" * 30)
print(f"Latency: {end_time - start_time:.2f} seconds")

# --- CORRECTED LINES ---
# Access attributes using dot notation
print(f"Emissions: {emissions_data.emissions * 1000:.6f} gCO2eq")
print(f"Energy: {emissions_data.energy_consumed * 1000:.6f} Wh")
# --- END OF CORRECTION ---

print("=" * 50)

finally:
# --- CORRECTED LINES ---
# stop() returns a FLOAT (total_emissions_kg)
total_emissions_kg = tracker.stop()
print("\n--- Total Emissions Summary (Saved to emissions.csv) ---")
# Access total energy from the tracker object itself
print(
f"Total Energy Consumed: {tracker.final_emissions_data.energy_consumed * 1000:.4f} Wh"
)
print(f"Total CO2 Emitted: {total_emissions_kg * 1000:.4f} gCO2eq")
# --- END OF CORRECTION ---


# --- 5. Run Ragas Accuracy Evaluation ---

print("\n--- Running Ragas Accuracy Evaluation ---")

# Convert your collected data into a Hugging Face Dataset object
eval_dataset = Dataset.from_dict(eval_data)

# --- Set up the Ragas evaluator to use YOUR local model ---
# We must wrap our local model for Ragas to use it as a judge.
# The easiest way is to use the Langchain wrapper.

# 1. Import the required LangChain and Ragas wrapper classes
# You may need to run: pip install langchain-community


# 2. Create a new LangChain LlamaCpp object *just for evaluation*
# This points to the same model file.
eval_llm = LlamaCpp(
model_path=MODEL_PATH,
n_gpu_layers=1, # Match your settings from Section 2
n_batch=512, # Match your settings
n_ctx=3900, # Match your settings
temperature=0, # Evaluators should be deterministic
verbose=False,
)
# 3. Wrap the LangChain object for Ragas
ragas_llm = LangchainLLMWrapper(eval_llm)

# 4. Run the evaluation, passing the wrapped LLM and embeddings directly
result = evaluate(
eval_dataset,
metrics=[
faithfulness,
answer_relevancy,
],
llm=ragas_llm, # <-- Pass the evaluator LLM here
embeddings=embed_model, # <-- Pass the embeddings here
)

print("\n--- Ragas Accuracy Results ---")
print(result)

# The result will be a dictionary like:
# {'faithfulness': 0.85, 'answer_relevancy': 0.92}

print("\n--- Project Evaluation Complete ---")
Binary file added MIstral7B/data/Machine Learning.pdf
Binary file not shown.
119 changes: 119 additions & 0 deletions MIstral7B/evaluate_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import json

from datasets import Dataset
from langchain_community.llms import LlamaCpp # Use LangChain's LlamaCpp for evaluation
from llama_index.embeddings.huggingface import (
HuggingFaceEmbedding,
) # Need this for embeddings
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import answer_relevancy, faithfulness

# --- 1. Configuration ---
MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" # Same model used for generation
INPUT_FILE = "D:/Mistral7B/rag_results.json" # The file saved by the previous script

# --- 2. Load the Saved Results ---
print(f"Loading results from {INPUT_FILE}...")
with open(INPUT_FILE, "r", encoding="utf-8") as f:
loaded_data = json.load(f)

# Convert the list of dictionaries into a Hugging Face Dataset
# Ragas requires columns named 'question', 'answer', 'contexts'
eval_dataset = Dataset.from_list([loaded_data[0]]) # Only evaluate the first item
print(f"Loaded {len(eval_dataset)} results.")

# --- 3. Initialize Evaluator Model and Embeddings ---
print("Initializing evaluator models...")
# ... (gpu_layers = 0 setting) ...

eval_llm = LlamaCpp(
model_path=MODEL_PATH,
# ... other parameters ...
n_ctx=1024, # Keep reduced context
# ...
)
ragas_llm = LangchainLLMWrapper(eval_llm)

# --- ADD THIS TEST BLOCK ---
print("\n--- Testing eval_llm directly ---")
try:
test_prompt = (
"Explain the importance of testing in software development in one sentence."
)
print(f"Sending test prompt: {test_prompt}")
response = eval_llm.invoke(test_prompt)
print(f"Test response received: {response}")
print("--- eval_llm test successful ---\n")
except Exception as e:
print("--- eval_llm test FAILED ---")
print(f"Error during direct invocation: {e}")
import traceback

traceback.print_exc()
# Decide if you want to exit here or continue to ragas evaluation
# exit() # Uncomment to stop if the direct test fails
# --- END OF TEST BLOCK ---

# Ragas metrics might also need embeddings
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# --- 4. Run Ragas Evaluation ---
print("\n--- Running Ragas Accuracy Evaluation ---")

try:
print("Starting Ragas evaluate()...") # <-- ADDED
result = evaluate(
eval_dataset,
metrics=[
faithfulness,
answer_relevancy,
],
llm=ragas_llm,
embeddings=embed_model,
# raise_exceptions=False # Optional: Try adding this if it keeps crashing
)
print("Ragas evaluate() finished.") # <-- ADDED

print("\n--- Ragas Accuracy Results ---")
print(result) # <-- KEEP THIS

# Save results to a file for later analysis
print("Preparing to save results to JSON...") # <-- ADDED
with open("ragas_evaluation_results.json", "w") as f:
# Convert numpy values to Python native types for JSON serialization
import numpy as np

# Check if result is not None and is a dictionary before processing
if result and isinstance(result, dict):
result_dict = {
k: float(v)
if isinstance(v, (np.number, float)) and not np.isnan(v)
else None
for k, v in result.items()
}
print(
f"Result dictionary prepared: {result_dict}"
) # <-- ADDED (optional, can be verbose)
json.dump(result_dict, f, indent=4)
print("Results saved to ragas_evaluation_results.json") # <-- ADDED
else:
print(
"Evaluation result was None or not a dictionary, skipping save."
) # <-- ADDED

except Exception as e:
print("\n--- Evaluation Error ---")
print(f"Error during evaluation or saving: {e}") # <-- MODIFIED
import traceback

traceback.print_exc()

# Make sure to explicitly delete the model to avoid memory issues
if "eval_llm" in locals():
print("Deleting LLM objects...") # <-- ADDED
del ragas_llm
del eval_llm
print("LLM objects deleted.") # <-- ADDED

print("\n--- Evaluation Script Finished ---")
20 changes: 20 additions & 0 deletions MIstral7B/testing-gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from llama_cpp import Llama

# --- Configuration ---
# IMPORTANT: Update this to the correct path on your machine
MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf"

print("Attempting to load model with GPU...")

try:
llm = Llama(
model_path=MODEL_PATH,
n_gpu_layers=-1, # Try to offload all layers to GPU
verbose=True, # This is the most important part!
)
print("\n--- TEST SUCCESSFUL ---")
# Check the output above for lines mentioning CUDA or cuBLAS and layer offloading

except Exception as e:
print("\n--- TEST FAILED ---")
print(f"An error occurred: {e}")
Loading