diff --git a/MIstral7B/Rag-test.py b/MIstral7B/Rag-test.py new file mode 100644 index 0000000..9d36902 --- /dev/null +++ b/MIstral7B/Rag-test.py @@ -0,0 +1,59 @@ +from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.llms.llama_cpp import LlamaCPP + +# --- Configuration --- +# Point to your downloaded model file +MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" # <-- IMPORTANT: update this path + +# --- 1. Load the LLM (our quantized Mistral model) --- +# This uses llama-cpp-python to run the GGUF model on your CPU +llm = LlamaCPP( + model_path=MODEL_PATH, + # Model parameters - you can adjust these + temperature=0.1, + max_new_tokens=512, + context_window=3900, # The model's context window size + generate_kwargs={}, + model_kwargs={ + "n_gpu_layers": -1 + }, # Set to > 0 if you have a GPU and want to offload layers + verbose=True, +) + +# --- 2. Configure the Embedding Model --- +# This model creates numerical representations of your text for retrieval. +# It runs locally on your machine. +embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") + +# --- 3. Set up Global Settings --- +# This tells LlamaIndex to use our chosen models. +Settings.llm = llm +Settings.embed_model = embed_model + +# --- 4. Load Your Data --- +# This will load all files from the 'data' directory. +print("Loading documents...") +documents = SimpleDirectoryReader("D:/Mistral7B/data").load_data() +print(f"Loaded {len(documents)} document(s).") + +# --- 5. Create the Index and Query Engine --- +# The VectorStoreIndex will process your documents and build a searchable index. +# The query engine connects the retriever (finds relevant text) with the LLM (generates answers). +print("Indexing documents... (this may take a moment)") +index = VectorStoreIndex.from_documents(documents) +query_engine = index.as_query_engine(streaming=True) + +# --- 6. Start Querying --- +print("\n--- Query Engine Ready ---") +while True: + query = input("Ask a question about your documents: ") + if query.lower() == "exit": + break + + response_stream = query_engine.query(query) + + print("\nAssistant: ", end="") + # Stream the response to the console + response_stream.print_response_stream() + print("\n" + "-" * 50) diff --git a/MIstral7B/With-Eval.py b/MIstral7B/With-Eval.py new file mode 100644 index 0000000..eb5de6a --- /dev/null +++ b/MIstral7B/With-Eval.py @@ -0,0 +1,179 @@ +import time + +from codecarbon import OfflineEmissionsTracker +from datasets import Dataset +from langchain_community.llms import LlamaCpp +from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.llms.llama_cpp import LlamaCPP +from ragas import evaluate +from ragas.llms import LangchainLLMWrapper +from ragas.metrics import answer_relevancy, faithfulness + +# --- 1. Configuration --- + +# Set the path to your downloaded GGUF model +MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" # <-- IMPORTANT: Update this path if needed + +# Set the path to your data (PDFs, .txt, etc.) +DATA_PATH = "D:/Mistral7B/data" # <-- IMPORTANT: Update this path if needed + +# Set your country's ISO code for CodeCarbon +# Find your code: https://en.wikipedia.org/wiki/List_of_ISO_3166-1_alpha-3_codes +# Using "EGY" for Egypt as an example +YOUR_COUNTRY_ISO_CODE = "EGY" + +# Define your "Golden Set" of test questions +TEST_QUESTIONS = [ + "What is the main topic of the document?", + # "Summarize the key findings in three bullet points.", + # ... add 10-15 more of your own questions ... + # "What is [a specific term] according to the text?", + # "What conclusion does the author reach?", +] + +# --- 2. Initialize Models --- + +print("Initializing models...") + +# Load the local LLM (Mistral 7B) +llm = LlamaCPP( + model_path=MODEL_PATH, + temperature=0.1, + max_new_tokens=512, + context_window=3900, + generate_kwargs={}, + model_kwargs={"n_gpu_layers": 1}, # Set > 0 if you have GPU offloading + verbose=True, +) + +# Load the local Embedding Model +embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") + +# Set up LlamaIndex global settings +Settings.llm = llm +Settings.embed_model = embed_model + +# --- 3. Load & Index Documents --- + +print("Loading documents...") +documents = SimpleDirectoryReader(DATA_PATH).load_data() +print(f"Loaded {len(documents)} document(s).") + +print("Indexing documents... (this may take a moment)") +index = VectorStoreIndex.from_documents(documents) +query_engine = index.as_query_engine() +print("Indexing complete.") + +# --- 4. Run Evaluation (Accuracy & Environmental Cost) --- + +# Initialize a list to hold our evaluation data +eval_data = { + "question": [], + "answer": [], + "contexts": [], +} + +# Initialize the CO2 Emissions Tracker +print(f"\nInitializing CodeCarbon tracker for country: {YOUR_COUNTRY_ISO_CODE}") +tracker = OfflineEmissionsTracker(country_iso_code=YOUR_COUNTRY_ISO_CODE) +tracker.start() + +print("\n--- Starting Evaluation Loop ---") + +try: + for query in TEST_QUESTIONS: + print(f"\nQuerying: {query}") + + # --- Start tracking for this specific query --- + tracker.start_task("RAG Query") + start_time = time.time() + + # Run the query + response = query_engine.query(query) + + # --- Stop tracking for this query --- + end_time = time.time() + # stop_task() returns an EmissionsData OBJECT + emissions_data = tracker.stop_task() + + # Collect results for ragas + answer = str(response) + contexts = [node.get_content() for node in response.source_nodes] + + eval_data["question"].append(query) + eval_data["answer"].append(answer) + eval_data["contexts"].append(contexts) + + # --- Print Results for this Query --- + print(f"Answer: {answer}") + print("-" * 30) + print(f"Latency: {end_time - start_time:.2f} seconds") + + # --- CORRECTED LINES --- + # Access attributes using dot notation + print(f"Emissions: {emissions_data.emissions * 1000:.6f} gCO2eq") + print(f"Energy: {emissions_data.energy_consumed * 1000:.6f} Wh") + # --- END OF CORRECTION --- + + print("=" * 50) + +finally: + # --- CORRECTED LINES --- + # stop() returns a FLOAT (total_emissions_kg) + total_emissions_kg = tracker.stop() + print("\n--- Total Emissions Summary (Saved to emissions.csv) ---") + # Access total energy from the tracker object itself + print( + f"Total Energy Consumed: {tracker.final_emissions_data.energy_consumed * 1000:.4f} Wh" + ) + print(f"Total CO2 Emitted: {total_emissions_kg * 1000:.4f} gCO2eq") + # --- END OF CORRECTION --- + + +# --- 5. Run Ragas Accuracy Evaluation --- + +print("\n--- Running Ragas Accuracy Evaluation ---") + +# Convert your collected data into a Hugging Face Dataset object +eval_dataset = Dataset.from_dict(eval_data) + +# --- Set up the Ragas evaluator to use YOUR local model --- +# We must wrap our local model for Ragas to use it as a judge. +# The easiest way is to use the Langchain wrapper. + +# 1. Import the required LangChain and Ragas wrapper classes +# You may need to run: pip install langchain-community + + +# 2. Create a new LangChain LlamaCpp object *just for evaluation* +# This points to the same model file. +eval_llm = LlamaCpp( + model_path=MODEL_PATH, + n_gpu_layers=1, # Match your settings from Section 2 + n_batch=512, # Match your settings + n_ctx=3900, # Match your settings + temperature=0, # Evaluators should be deterministic + verbose=False, +) +# 3. Wrap the LangChain object for Ragas +ragas_llm = LangchainLLMWrapper(eval_llm) + +# 4. Run the evaluation, passing the wrapped LLM and embeddings directly +result = evaluate( + eval_dataset, + metrics=[ + faithfulness, + answer_relevancy, + ], + llm=ragas_llm, # <-- Pass the evaluator LLM here + embeddings=embed_model, # <-- Pass the embeddings here +) + +print("\n--- Ragas Accuracy Results ---") +print(result) + +# The result will be a dictionary like: +# {'faithfulness': 0.85, 'answer_relevancy': 0.92} + +print("\n--- Project Evaluation Complete ---") diff --git a/MIstral7B/data/Machine Learning.pdf b/MIstral7B/data/Machine Learning.pdf new file mode 100644 index 0000000..7ce771e Binary files /dev/null and b/MIstral7B/data/Machine Learning.pdf differ diff --git a/MIstral7B/evaluate_results.py b/MIstral7B/evaluate_results.py new file mode 100644 index 0000000..9d5b0e2 --- /dev/null +++ b/MIstral7B/evaluate_results.py @@ -0,0 +1,119 @@ +import json + +from datasets import Dataset +from langchain_community.llms import LlamaCpp # Use LangChain's LlamaCpp for evaluation +from llama_index.embeddings.huggingface import ( + HuggingFaceEmbedding, +) # Need this for embeddings +from ragas import evaluate +from ragas.llms import LangchainLLMWrapper +from ragas.metrics import answer_relevancy, faithfulness + +# --- 1. Configuration --- +MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" # Same model used for generation +INPUT_FILE = "D:/Mistral7B/rag_results.json" # The file saved by the previous script + +# --- 2. Load the Saved Results --- +print(f"Loading results from {INPUT_FILE}...") +with open(INPUT_FILE, "r", encoding="utf-8") as f: + loaded_data = json.load(f) + +# Convert the list of dictionaries into a Hugging Face Dataset +# Ragas requires columns named 'question', 'answer', 'contexts' +eval_dataset = Dataset.from_list([loaded_data[0]]) # Only evaluate the first item +print(f"Loaded {len(eval_dataset)} results.") + +# --- 3. Initialize Evaluator Model and Embeddings --- +print("Initializing evaluator models...") +# ... (gpu_layers = 0 setting) ... + +eval_llm = LlamaCpp( + model_path=MODEL_PATH, + # ... other parameters ... + n_ctx=1024, # Keep reduced context + # ... +) +ragas_llm = LangchainLLMWrapper(eval_llm) + +# --- ADD THIS TEST BLOCK --- +print("\n--- Testing eval_llm directly ---") +try: + test_prompt = ( + "Explain the importance of testing in software development in one sentence." + ) + print(f"Sending test prompt: {test_prompt}") + response = eval_llm.invoke(test_prompt) + print(f"Test response received: {response}") + print("--- eval_llm test successful ---\n") +except Exception as e: + print("--- eval_llm test FAILED ---") + print(f"Error during direct invocation: {e}") + import traceback + + traceback.print_exc() + # Decide if you want to exit here or continue to ragas evaluation + # exit() # Uncomment to stop if the direct test fails +# --- END OF TEST BLOCK --- + +# Ragas metrics might also need embeddings +embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") + +# --- 4. Run Ragas Evaluation --- +print("\n--- Running Ragas Accuracy Evaluation ---") + +try: + print("Starting Ragas evaluate()...") # <-- ADDED + result = evaluate( + eval_dataset, + metrics=[ + faithfulness, + answer_relevancy, + ], + llm=ragas_llm, + embeddings=embed_model, + # raise_exceptions=False # Optional: Try adding this if it keeps crashing + ) + print("Ragas evaluate() finished.") # <-- ADDED + + print("\n--- Ragas Accuracy Results ---") + print(result) # <-- KEEP THIS + + # Save results to a file for later analysis + print("Preparing to save results to JSON...") # <-- ADDED + with open("ragas_evaluation_results.json", "w") as f: + # Convert numpy values to Python native types for JSON serialization + import numpy as np + + # Check if result is not None and is a dictionary before processing + if result and isinstance(result, dict): + result_dict = { + k: float(v) + if isinstance(v, (np.number, float)) and not np.isnan(v) + else None + for k, v in result.items() + } + print( + f"Result dictionary prepared: {result_dict}" + ) # <-- ADDED (optional, can be verbose) + json.dump(result_dict, f, indent=4) + print("Results saved to ragas_evaluation_results.json") # <-- ADDED + else: + print( + "Evaluation result was None or not a dictionary, skipping save." + ) # <-- ADDED + +except Exception as e: + print("\n--- Evaluation Error ---") + print(f"Error during evaluation or saving: {e}") # <-- MODIFIED + import traceback + + traceback.print_exc() + +# Make sure to explicitly delete the model to avoid memory issues +if "eval_llm" in locals(): + print("Deleting LLM objects...") # <-- ADDED + del ragas_llm + del eval_llm + print("LLM objects deleted.") # <-- ADDED + +print("\n--- Evaluation Script Finished ---") diff --git a/MIstral7B/testing-gpu.py b/MIstral7B/testing-gpu.py new file mode 100644 index 0000000..0c92b10 --- /dev/null +++ b/MIstral7B/testing-gpu.py @@ -0,0 +1,20 @@ +from llama_cpp import Llama + +# --- Configuration --- +# IMPORTANT: Update this to the correct path on your machine +MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" + +print("Attempting to load model with GPU...") + +try: + llm = Llama( + model_path=MODEL_PATH, + n_gpu_layers=-1, # Try to offload all layers to GPU + verbose=True, # This is the most important part! + ) + print("\n--- TEST SUCCESSFUL ---") + # Check the output above for lines mentioning CUDA or cuBLAS and layer offloading + +except Exception as e: + print("\n--- TEST FAILED ---") + print(f"An error occurred: {e}")