#!/usr/bin/env python3
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Top-level benchmarking script that automatically discovers and runs all benchmarks
in the ./benches directory, organizing outputs into model-specific subfolders.
"""

import argparse
import importlib.util
import json
import logging
import os
import sys
import uuid
from datetime import datetime
from pathlib import Path
from typing import Any, Optional


def setup_logging(log_level: str = "INFO", enable_file_logging: bool = False) -> logging.Logger:
    """Setup logging configuration."""
    numeric_level = getattr(logging, log_level.upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError(f"Invalid log level: {log_level}")

    handlers = [logging.StreamHandler(sys.stdout)]

    if enable_file_logging:
        handlers.append(logging.FileHandler(f"benchmark_run_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"))

    logging.basicConfig(
        level=numeric_level, format="[%(levelname)s - %(asctime)s] %(name)s: %(message)s", handlers=handlers
    )

    return logging.getLogger(__name__)


def discover_benchmarks(benches_dir: str) -> list[dict[str, Any]]:
    """
    Discover all benchmark modules in the benches directory.

    Returns:
        List of dictionaries containing benchmark module info
    """
    benchmarks = []
    benches_path = Path(benches_dir)

    if not benches_path.exists():
        raise FileNotFoundError(f"Benches directory not found: {benches_dir}")

    for py_file in benches_path.glob("*.py"):
        if py_file.name.startswith("__"):
            continue

        module_name = py_file.stem

        try:
            # Import the module
            spec = importlib.util.spec_from_file_location(module_name, py_file)
            module = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(module)

            # Check if it has a benchmark runner function
            if hasattr(module, f"run_{module_name}"):
                benchmarks.append(
                    {
                        "name": module_name,
                        "path": str(py_file),
                        "module": module,
                        "runner_function": getattr(module, f"run_{module_name}"),
                    }
                )
            elif hasattr(module, "run_benchmark"):
                benchmarks.append(
                    {
                        "name": module_name,
                        "path": str(py_file),
                        "module": module,
                        "runner_function": getattr(module, "run_benchmark"),
                    }
                )
            else:
                logging.warning(f"No runner function found in {py_file}")

        except Exception as e:
            logging.error(f"Failed to import {py_file}: {e}")

    return benchmarks


def run_single_benchmark(
    benchmark_info: dict[str, Any], output_dir: str, logger: logging.Logger, **kwargs
) -> Optional[str]:
    """
    Run a single benchmark and return the output file path.

    Args:
        benchmark_info: Dictionary containing benchmark module info
        output_dir: Base output directory
        logger: Logger instance
        **kwargs: Additional arguments to pass to the benchmark

    Returns:
        Path to the output file if successful, None otherwise
    """
    benchmark_name = benchmark_info["name"]
    runner_func = benchmark_info["runner_function"]

    logger.info(f"Running benchmark: {benchmark_name}")

    try:
        # Check function signature to determine what arguments to pass
        import inspect

        sig = inspect.signature(runner_func)

        # Prepare arguments based on function signature
        func_kwargs = {"logger": logger, "output_dir": output_dir}

        # Add other kwargs if the function accepts them
        for param_name in sig.parameters:
            if param_name in kwargs:
                func_kwargs[param_name] = kwargs[param_name]

        # Filter kwargs to only include parameters the function accepts
        # If function has **kwargs, include all provided kwargs
        has_var_kwargs = any(param.kind == param.VAR_KEYWORD for param in sig.parameters.values())
        if has_var_kwargs:
            valid_kwargs = {**func_kwargs, **kwargs}
        else:
            valid_kwargs = {k: v for k, v in func_kwargs.items() if k in sig.parameters}

        # Run the benchmark
        result = runner_func(**valid_kwargs)

        if isinstance(result, str):
            # Function returned a file path
            return result
        else:
            logger.info(f"Benchmark {benchmark_name} completed successfully")
            return "completed"

    except Exception as e:
        logger.error(f"Benchmark {benchmark_name} failed: {e}")
        import traceback

        logger.debug(traceback.format_exc())
        return None


def generate_summary_report(
    output_dir: str,
    benchmark_results: dict[str, Any],
    logger: logging.Logger,
    benchmark_run_uuid: Optional[str] = None,
) -> str:
    """Generate a summary report of all benchmark runs."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.json")

    summary_data = {
        "run_metadata": {
            "timestamp": datetime.utcnow().isoformat(),
            "benchmark_run_uuid": benchmark_run_uuid,
            "total_benchmarks": len(benchmark_results),
            "successful_benchmarks": len([r for r in benchmark_results.values() if r is not None]),
            "failed_benchmarks": len([r for r in benchmark_results.values() if r is None]),
        },
        "benchmark_results": benchmark_results,
        "output_directory": output_dir,
    }

    with open(summary_file, "w") as f:
        json.dump(summary_data, f, indent=2, default=str)

    logger.info(f"Summary report saved to: {summary_file}")
    return summary_file


def upload_results_to_hf_dataset(
    output_dir: str,
    summary_file: str,
    dataset_name: str,
    run_id: Optional[str] = None,
    token: Optional[str] = None,
    logger: Optional[logging.Logger] = None,
) -> Optional[str]:
    """
    Upload benchmark results to a HuggingFace Dataset.
    Based on upload_collated_report() from utils/collated_reports.py
    Args:
        output_dir: Local output directory containing results
        summary_file: Path to the summary file
        dataset_name: Name of the HuggingFace dataset to upload to
        run_id: Unique run identifier (if None, will generate one)
        token: HuggingFace token for authentication (if None, will use environment variables)
        logger: Logger instance
    Returns:
        The run_id used for the upload, None if upload failed
    """
    if logger is None:
        logger = logging.getLogger(__name__)

    import os

    from huggingface_hub import HfApi

    api = HfApi()

    if run_id is None:
        github_run_number = os.getenv("GITHUB_RUN_NUMBER")
        github_run_id = os.getenv("GITHUB_RUN_ID")
        if github_run_number and github_run_id:
            run_id = f"{github_run_number}-{github_run_id}"

    date_folder = datetime.now().strftime("%Y-%m-%d")

    github_event_name = os.getenv("GITHUB_EVENT_NAME")
    if github_event_name != "schedule":
        # Non-scheduled runs go under a runs subfolder
        repo_path = f"{date_folder}/runs/{run_id}/benchmark_results"
    else:
        # Scheduled runs go directly under the date
        repo_path = f"{date_folder}/{run_id}/benchmark_results"

    logger.info(f"Uploading benchmark results to dataset '{dataset_name}' at path '{repo_path}'")

    try:
        # Upload all files in the output directory
        from pathlib import Path

        output_path = Path(output_dir)

        for file_path in output_path.rglob("*"):
            if file_path.is_file():
                # Calculate relative path from output_dir
                relative_path = file_path.relative_to(output_path)
                path_in_repo = f"{repo_path}/{relative_path}"

                logger.debug(f"Uploading {file_path} to {path_in_repo}")

                api.upload_file(
                    path_or_fileobj=str(file_path),
                    path_in_repo=path_in_repo,
                    repo_id=dataset_name,
                    repo_type="dataset",
                    token=token,
                    commit_message=f"Upload benchmark results for run {run_id}",
                )

        logger.info(
            f"Successfully uploaded results to: https://huggingface.co/datasets/{dataset_name}/tree/main/{repo_path}"
        )

        return run_id

    except Exception as upload_error:
        logger.error(f"Failed to upload results: {upload_error}")
        import traceback

        logger.debug(traceback.format_exc())
        return None


def main():
    """Main entry point for the benchmarking script."""
    # Generate a unique UUID for this benchmark run
    benchmark_run_uuid = str(uuid.uuid4())[:8]

    parser = argparse.ArgumentParser(
        description="Run all benchmarks in the ./benches directory",
        epilog="""
Examples:
  # Run all available benchmarks
  python3 run_benchmarks.py
  
  # Run with specific model and upload to HuggingFace Dataset
  python3 run_benchmarks.py --model-id meta-llama/Llama-2-7b-hf --upload-to-hf username/benchmark-results
  
  # Run with custom run ID and upload to HuggingFace Dataset
  python3 run_benchmarks.py --run-id experiment_v1 --upload-to-hf org/benchmarks
  
  # Run only specific benchmarks with file logging
  python3 run_benchmarks.py --include llama --enable-file-logging
        """,  # noqa: W293
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument(
        "--output-dir",
        type=str,
        default="benchmark_results",
        help="Base output directory for benchmark results (default: benchmark_results)",
    )

    parser.add_argument(
        "--benches-dir",
        type=str,
        default="./benches",
        help="Directory containing benchmark implementations (default: ./benches)",
    )

    parser.add_argument(
        "--log-level",
        type=str,
        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
        default="INFO",
        help="Logging level (default: INFO)",
    )

    parser.add_argument("--model-id", type=str, help="Specific model ID to benchmark (if supported by benchmarks)")

    parser.add_argument("--warmup-iterations", type=int, default=3, help="Number of warmup iterations (default: 3)")

    parser.add_argument(
        "--measurement-iterations", type=int, default=5, help="Number of measurement iterations (default: 5)"
    )

    parser.add_argument(
        "--num-tokens-to-generate",
        type=int,
        default=100,
        help="Number of tokens to generate in benchmarks (default: 100)",
    )

    parser.add_argument("--include", type=str, nargs="*", help="Only run benchmarks matching these names")

    parser.add_argument("--exclude", type=str, nargs="*", help="Exclude benchmarks matching these names")

    parser.add_argument("--enable-file-logging", action="store_true", help="Enable file logging (disabled by default)")

    parser.add_argument(
        "--commit-id", type=str, help="Git commit ID for metadata (if not provided, will auto-detect from git)"
    )

    parser.add_argument(
        "--push-to-hub",
        type=str,
        help="Upload results to HuggingFace Dataset (provide dataset name, e.g., 'username/benchmark-results')",
    )

    parser.add_argument(
        "--run-id", type=str, help="Custom run ID for organizing results (if not provided, will generate a unique ID)"
    )

    parser.add_argument(
        "--token",
        type=str,
        help="HuggingFace token for dataset uploads (if not provided, will use HF_TOKEN environment variable)",
    )

    args = parser.parse_args()

    # Setup logging
    logger = setup_logging(args.log_level, args.enable_file_logging)

    logger.info("Starting benchmark discovery and execution")
    logger.info(f"Benchmark run UUID: {benchmark_run_uuid}")
    logger.info(f"Output directory: {args.output_dir}")
    logger.info(f"Benches directory: {args.benches_dir}")

    # Create output directory
    os.makedirs(args.output_dir, exist_ok=True)

    try:
        # Discover benchmarks
        benchmarks = discover_benchmarks(args.benches_dir)
        logger.info(f"Discovered {len(benchmarks)} benchmark(s): {[b['name'] for b in benchmarks]}")

        if not benchmarks:
            logger.warning("No benchmarks found!")
            return 1

        # Filter benchmarks based on include/exclude
        filtered_benchmarks = benchmarks

        if args.include:
            filtered_benchmarks = [
                b for b in filtered_benchmarks if any(pattern in b["name"] for pattern in args.include)
            ]
            logger.info(f"Filtered to include: {[b['name'] for b in filtered_benchmarks]}")

        if args.exclude:
            filtered_benchmarks = [
                b for b in filtered_benchmarks if not any(pattern in b["name"] for pattern in args.exclude)
            ]
            logger.info(f"After exclusion: {[b['name'] for b in filtered_benchmarks]}")

        if not filtered_benchmarks:
            logger.warning("No benchmarks remaining after filtering!")
            return 1

        # Prepare common kwargs for benchmarks
        benchmark_kwargs = {
            "warmup_iterations": args.warmup_iterations,
            "measurement_iterations": args.measurement_iterations,
            "num_tokens_to_generate": args.num_tokens_to_generate,
        }

        if args.model_id:
            benchmark_kwargs["model_id"] = args.model_id

        # Add commit_id if provided
        if args.commit_id:
            benchmark_kwargs["commit_id"] = args.commit_id

        # Run benchmarks
        benchmark_results = {}
        successful_count = 0

        for benchmark_info in filtered_benchmarks:
            result = run_single_benchmark(benchmark_info, args.output_dir, logger, **benchmark_kwargs)

            benchmark_results[benchmark_info["name"]] = result

            if result is not None:
                successful_count += 1

        # Generate summary report
        summary_file = generate_summary_report(args.output_dir, benchmark_results, logger, benchmark_run_uuid)

        # Upload results to HuggingFace Dataset if requested
        upload_run_id = None
        if args.push_to_hub:
            logger.info("=" * 60)
            logger.info("UPLOADING TO HUGGINGFACE DATASET")
            logger.info("=" * 60)
            # Use provided run_id or fallback to benchmark run UUID
            effective_run_id = args.run_id or benchmark_run_uuid
            upload_run_id = upload_results_to_hf_dataset(
                output_dir=args.output_dir,
                summary_file=summary_file,
                dataset_name=args.push_to_hub,
                run_id=effective_run_id,
                token=args.token,
                logger=logger,
            )
            if upload_run_id:
                logger.info(f"Upload completed with run ID: {upload_run_id}")
            else:
                logger.warning("Upload failed - continuing with local results")

        # Final summary
        total_benchmarks = len(filtered_benchmarks)
        failed_count = total_benchmarks - successful_count

        logger.info("=" * 60)
        logger.info("BENCHMARK RUN SUMMARY")
        logger.info("=" * 60)
        logger.info(f"Total benchmarks: {total_benchmarks}")
        logger.info(f"Successful: {successful_count}")
        logger.info(f"Failed: {failed_count}")
        logger.info(f"Output directory: {args.output_dir}")
        logger.info(f"Summary report: {summary_file}")

        if args.push_to_hub:
            if upload_run_id:
                logger.info(f"HuggingFace Dataset: {args.push_to_hub}")
                logger.info(f"Run ID: {upload_run_id}")
                logger.info(
                    f"View results: https://huggingface.co/datasets/{args.push_to_hub}/tree/main/{datetime.now().strftime('%Y-%m-%d')}/runs/{upload_run_id}"
                )
            else:
                logger.warning("Upload to HuggingFace Dataset failed")

        if failed_count > 0:
            logger.warning(f"{failed_count} benchmark(s) failed. Check logs for details.")
            return 1
        else:
            logger.info("All benchmarks completed successfully!")
            return 0

    except Exception as e:
        logger.error(f"Benchmark run failed: {e}")
        import traceback

        logger.debug(traceback.format_exc())
        return 1


if __name__ == "__main__":
    sys.exit(main())