MADSci Utilization Plots - MADSci Documentation

This notebook generates visualizations for MADSci utilization reports

%pip install seaborn

import warnings
from typing import Optional, Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from madsci.client.event_client import EventClient

warnings.filterwarnings("ignore")

# Set plotting style
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_palette("husl")
plt.rcParams["figure.figsize"] = (15, 10)
plt.rcParams["font.size"] = 12

print("MADSci Utilization Visualization Notebook")
print("Libraries loaded successfully")

# Cell 2: Initialize EventClient and fetch reports
# Initialize the event client
client = EventClient()

Fetch analysis reports for:¶

Daily analysis
User anlaysis
Workcell session analysis

try:
    # Periods report (daily analysis)
    periods_report = client.get_utilization_periods(
        analysis_type="daily",
        user_timezone="America/Chicago",
        include_users=True,
        csv_export=False,  # JSON format
    )

    # User utilization report
    user_report = client.get_user_utilization_report(
        csv_export=False  # JSON format
    )

    # Session utilization report
    session_report = client.get_session_utilization(
        csv_export=False  # JSON format
    )

    print("Reports fetched successfully!")
    print(
        f"Periods report periods: {periods_report.get('summary_metadata', {}).get('total_periods', 'N/A')}"
    )
    print(
        f"User report users: {user_report.get('report_metadata', {}).get('total_users', 'N/A')}"
    )
    print(
        f"Session report sessions: {session_report.get('overall_summary', {}).get('total_sessions', 'N/A')}"
    )

except Exception as e:
    print(f"Error fetching reports: {e}")
    periods_report = user_report = session_report = None

print(periods_report)

Helper functions for data extraction¶

def extract_periods_data(
    periods_report: Union[dict, str, None],
) -> Optional[tuple[pd.DataFrame, dict]]:
    """Extract time series and node data from periods report."""
    if not periods_report or "time_series" not in periods_report:
        return None, None

    # Extract system time series data
    system_data = periods_report["time_series"].get("system", [])
    if not system_data:
        return None, None

    # Convert to DataFrame
    df = pd.DataFrame(system_data)

    # Extract node data
    node_data = periods_report["time_series"].get("nodes", {})

    return df, node_data


def extract_user_data(user_report: Union[dict, str, None]) -> Optional[pd.DataFrame]:
    """Extract user statistics from user report."""
    if not user_report or "user_utilization" not in user_report:
        return None

    users_data = []
    for _user_id, user_info in user_report["user_utilization"].items():
        users_data.append(
            {
                "author": user_info.get("author", "Unknown"),
                "total_workflows": user_info.get("total_workflows", 0),
                "completed_workflows": user_info.get("completed_workflows", 0),
                "failed_workflows": user_info.get("failed_workflows", 0),
                "cancelled_workflows": user_info.get("cancelled_workflows", 0),
                "total_runtime_hours": user_info.get("total_runtime_hours", 0),
                "completion_rate_percent": user_info.get("completion_rate_percent", 0),
                "average_workflow_duration_hours": user_info.get(
                    "average_workflow_duration_hours", 0
                ),
            }
        )

    return pd.DataFrame(users_data) if users_data else None


def extract_session_data(
    session_report: Union[dict, str, None],
) -> Optional[tuple[pd.DataFrame, dict]]:
    """Extract session data from session report."""
    if not session_report:
        return None, None

    sessions = session_report.get("session_details", [])
    summary = session_report.get("overall_summary", {})

    if not sessions:
        return None, summary

    session_data = []
    for session in sessions:
        session_data.append(
            {
                "session_name": session.get("session_name", "Unknown"),
                "session_type": session.get("session_type", "unknown"),
                "duration_hours": session.get("duration_hours", 0),
                "active_time_hours": session.get("active_time_hours", 0),
                "system_utilization_percent": session.get(
                    "system_utilization_percent", 0
                ),
                "total_experiments": session.get("total_experiments", 0),
                "start_time": session.get("start_time", ""),
                "end_time": session.get("end_time", ""),
            }
        )

    return pd.DataFrame(session_data), summary

Utilization Periods Report Visualizations¶

def plot_periods_analysis(periods_report: Optional[Union[dict, str]]) -> None:  # noqa: C901, PLR0915
    """Create visualizations for periods report."""
    if not periods_report:
        print("No periods report data available")
        return

    # Extract data
    df, node_data = extract_periods_data(periods_report)
    if df is None or df.empty:
        print("No time series data found in periods report")
        return

    # Create subplot layout
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle("MADSci Utilization Periods Analysis", fontsize=16, fontweight="bold")

    # 1. System utilization over time (line plot)
    ax1 = axes[0, 0]
    ax1.plot(
        range(len(df)),
        df["utilization"],
        marker="o",
        linewidth=2,
        markersize=6,
        color="steelblue",
    )
    ax1.set_title("System Utilization Over Time", fontweight="bold")
    ax1.set_xlabel("Period")
    ax1.set_ylabel("Utilization (%)")
    ax1.grid(True, alpha=0.3)

    # Set x-axis labels
    if "period_display" in df.columns:
        tick_positions = range(0, len(df), max(1, len(df) // 10))  # Show max 10 labels
        ax1.set_xticks(tick_positions)
        ax1.set_xticklabels(
            [df.iloc[i]["period_display"] for i in tick_positions],
            rotation=45,
            ha="right",
        )

    # 2. Experiments per period (bar chart)
    ax2 = axes[0, 1]
    bars = ax2.bar(range(len(df)), df["experiments"], alpha=0.7, color="orange")
    ax2.set_title("Experiments per Period", fontweight="bold")
    ax2.set_xlabel("Period")
    ax2.set_ylabel("Number of Experiments")

    # Add value labels on bars
    for _, bar in enumerate(bars):
        height = bar.get_height()
        if height > 0:
            ax2.text(
                bar.get_x() + bar.get_width() / 2.0,
                height + 0.1,
                f"{int(height)}",
                ha="center",
                va="bottom",
            )

    # 3. Runtime breakdown (stacked bar)
    ax3 = axes[1, 0]
    if "runtime_hours" in df.columns and "active_time_hours" in df.columns:
        active_time = df["active_time_hours"].fillna(0)
        idle_time = df["runtime_hours"].fillna(0) - active_time

        ax3.bar(
            range(len(df)), active_time, label="Active Time", alpha=0.8, color="green"
        )
        ax3.bar(
            range(len(df)),
            idle_time,
            bottom=active_time,
            label="Idle Time",
            alpha=0.6,
            color="gray",
        )
        ax3.set_title("Runtime Breakdown per Period", fontweight="bold")
        ax3.set_xlabel("Period")
        ax3.set_ylabel("Hours")
        ax3.legend()
    else:
        ax3.text(
            0.5,
            0.5,
            "Runtime breakdown data not available",
            ha="center",
            va="center",
            transform=ax3.transAxes,
        )
        ax3.set_title("Runtime Breakdown per Period", fontweight="bold")

    # 4. Node utilization (multi-line plot)
    ax4 = axes[1, 1]
    if node_data and len(node_data) > 0:
        colors = plt.cm.tab10(np.linspace(0, 1, len(node_data)))

        for i, (node_id, node_time_series) in enumerate(node_data.items()):
            if node_time_series:
                node_df = pd.DataFrame(node_time_series)
                if "utilization" in node_df.columns:
                    # Get node name for display
                    node_summary = periods_report.get("node_summary", {}).get(
                        node_id, {}
                    )
                    node_name = node_summary.get("display_name", f"Node {node_id[-8:]}")

                    ax4.plot(
                        range(len(node_df)),
                        node_df["utilization"],
                        marker="o",
                        linewidth=2,
                        markersize=4,
                        color=colors[i],
                        label=node_name,
                    )

        ax4.set_title("Node Utilization Over Time", fontweight="bold")
        ax4.set_xlabel("Period")
        ax4.set_ylabel("Utilization (%)")
        ax4.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
        ax4.grid(True, alpha=0.3)
    else:
        ax4.text(
            0.5,
            0.5,
            "No node data available",
            ha="center",
            va="center",
            transform=ax4.transAxes,
        )
        ax4.set_title("Node Utilization Over Time", fontweight="bold")

    plt.tight_layout()
    plt.show()

    # Print summary statistics
    print("Periods Report Summary:")
    print(f"Total Periods: {len(df)}")
    print(f"Average Utilization: {df['utilization'].mean():.1f}%")
    print(f"Peak Utilization: {df['utilization'].max():.1f}%")
    print(f"Total Experiments: {df['experiments'].sum()}")


# Generate periods visualizations
if periods_report:
    plot_periods_analysis(periods_report)
else:
    print("Skipping periods analysis - no data available")

User Utilization Report Visualizations¶

def plot_user_analysis(user_report: Optional[Union[dict, str]]) -> None:  # noqa: PLR0915
    """Create visualizations for user report."""
    if not user_report:
        print("No user report data available")
        return

    df = extract_user_data(user_report)
    if df is None or df.empty:
        print("No user data found in user report")
        return

    # Create subplot layout
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle("MADSci User Utilization Analysis", fontsize=16, fontweight="bold")

    # 1. Total workflows by user (horizontal bar)
    ax1 = axes[0, 0]
    y_pos = range(len(df))
    bars = ax1.barh(y_pos, df["total_workflows"], alpha=0.7, color="skyblue")
    ax1.set_yticks(y_pos)
    ax1.set_yticklabels(df["author"])
    ax1.set_xlabel("Total Workflows")
    ax1.set_title("Total Workflows by User", fontweight="bold")

    # Add value labels
    for _, bar in enumerate(bars):
        width = bar.get_width()
        if width > 0:
            ax1.text(
                width + 0.1,
                bar.get_y() + bar.get_height() / 2,
                f"{int(width)}",
                ha="left",
                va="center",
            )

    # 2. Completion rate by user (bar chart)
    ax2 = axes[0, 1]
    bars = ax2.bar(
        range(len(df)), df["completion_rate_percent"], alpha=0.7, color="green"
    )
    ax2.set_xticks(range(len(df)))
    ax2.set_xticklabels(df["author"], rotation=45, ha="right")
    ax2.set_ylabel("Completion Rate (%)")
    ax2.set_title("Completion Rate by User", fontweight="bold")
    ax2.set_ylim(0, 100)
    ax2.axhline(y=100, color="red", linestyle="--", alpha=0.7, label="Target (100%)")
    ax2.legend()

    # Add percentage labels
    for _, bar in enumerate(bars):
        height = bar.get_height()
        ax2.text(
            bar.get_x() + bar.get_width() / 2.0,
            height + 2,
            f"{height:.1f}%",
            ha="center",
            va="bottom",
        )

    # 3. Workflow status breakdown (stacked bar)
    ax3 = axes[1, 0]
    if len(df) > 0:
        width = 0.6
        x_pos = range(len(df))

        completed = df["completed_workflows"]
        failed = df["failed_workflows"]
        cancelled = df["cancelled_workflows"]

        ax3.bar(x_pos, completed, width, label="Completed", color="green", alpha=0.8)
        ax3.bar(
            x_pos,
            failed,
            width,
            bottom=completed,
            label="Failed",
            color="red",
            alpha=0.8,
        )
        ax3.bar(
            x_pos,
            cancelled,
            width,
            bottom=completed + failed,
            label="Cancelled",
            color="orange",
            alpha=0.8,
        )

        ax3.set_xticks(x_pos)
        ax3.set_xticklabels(df["author"], rotation=45, ha="right")
        ax3.set_ylabel("Number of Workflows")
        ax3.set_title("Workflow Status by User", fontweight="bold")
        ax3.legend()

    # 4. Runtime vs Workflows (scatter or summary)
    ax4 = axes[1, 1]
    if len(df) > 1:
        # Scatter plot for multiple users
        ax4.scatter(
            df["total_workflows"],
            df["total_runtime_hours"],
            s=100,
            alpha=0.7,
            color="purple",
        )

        # Add user labels
        for i, author in enumerate(df["author"]):
            ax4.annotate(
                author,
                (df["total_workflows"].iloc[i], df["total_runtime_hours"].iloc[i]),
                xytext=(5, 5),
                textcoords="offset points",
                fontsize=9,
            )

        ax4.set_xlabel("Total Workflows")
        ax4.set_ylabel("Total Runtime (hours)")
        ax4.set_title("Runtime vs Workflows", fontweight="bold")
        ax4.grid(True, alpha=0.3)
    else:
        # Summary for single user
        ax4.axis("off")
        user = df.iloc[0]
        summary_text = f"""
        User: {user["author"]}

        Total Workflows: {user["total_workflows"]}
        Completed: {user["completed_workflows"]}
        Failed: {user["failed_workflows"]}
        Cancelled: {user["cancelled_workflows"]}

        Total Runtime: {user["total_runtime_hours"]:.2f}h
        Avg Duration: {user["average_workflow_duration_hours"]:.3f}h
        Completion Rate: {user["completion_rate_percent"]:.1f}%
        """
        ax4.text(
            0.1,
            0.9,
            summary_text,
            fontsize=12,
            verticalalignment="top",
            bbox={"boxstyle": "round,pad=0.5", "facecolor": "lightgreen", "alpha": 0.7},
            transform=ax4.transAxes,
        )
        ax4.set_title("User Summary", fontweight="bold")

    plt.tight_layout()
    plt.show()

    # Print summary
    print("User Report Summary:")
    print(f"Total Users: {len(df)}")
    print(f"Total Workflows: {df['total_workflows'].sum()}")
    if df["total_workflows"].sum() > 0:
        print(
            f"Overall Completion Rate: {(df['completed_workflows'].sum() / df['total_workflows'].sum() * 100):.1f}%"
        )


# Generate user visualizations
if user_report:
    plot_user_analysis(user_report)
else:
    print("Skipping user analysis - no data available")

Session Utilization Report Visualizations¶

def plot_session_analysis(session_report: Optional[Union[dict, str]]) -> None:  # noqa: PLR0915
    """Create visualizations for session report."""
    if not session_report:
        print("No session report data available")
        return

    df, summary = extract_session_data(session_report)

    # Create subplot layout
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle("MADSci Session Utilization Analysis", fontsize=16, fontweight="bold")

    if df is not None and not df.empty:
        # 1. Session duration comparison (horizontal bar)
        ax1 = axes[0, 0]
        y_pos = range(len(df))
        bars = ax1.barh(y_pos, df["duration_hours"], alpha=0.7, color="coral")
        ax1.set_yticks(y_pos)
        ax1.set_yticklabels(df["session_name"])
        ax1.set_xlabel("Duration (hours)")
        ax1.set_title("Session Duration Comparison", fontweight="bold")

        # Add value labels
        for _i, bar in enumerate(bars):
            width = bar.get_width()
            if width > 0:
                ax1.text(
                    width + 0.01,
                    bar.get_y() + bar.get_height() / 2,
                    f"{width:.2f}h",
                    ha="left",
                    va="center",
                )

        # 2. Utilization by session (bar chart)
        ax2 = axes[0, 1]
        bars = ax2.bar(
            range(len(df)), df["system_utilization_percent"], alpha=0.7, color="gold"
        )
        ax2.set_xticks(range(len(df)))
        ax2.set_xticklabels([f"S{i + 1}" for i in range(len(df))], rotation=45)
        ax2.set_ylabel("Utilization (%)")
        ax2.set_title("Utilization by Session", fontweight="bold")

        # Add percentage labels
        for _i, bar in enumerate(bars):
            height = bar.get_height()
            ax2.text(
                bar.get_x() + bar.get_width() / 2.0,
                height + 0.5,
                f"{height:.1f}%",
                ha="center",
                va="bottom",
            )

        # 3. Experiments distribution (pie chart) - FIXED
        ax3 = axes[1, 0]
        experiment_counts = df["total_experiments"]
        if experiment_counts.sum() > 0:
            # Create the pie chart without alpha parameter
            wedges, _texts, _autotexts = ax3.pie(
                experiment_counts,
                labels=[
                    f"{name}\n({count} exp)"
                    for name, count in zip(df["session_name"], experiment_counts)
                ],
                autopct="%1.1f%%",
                startangle=90,
            )

            # Apply transparency to wedges manually
            for wedge in wedges:
                wedge.set_alpha(0.8)

            ax3.set_title("Experiment Distribution by Session", fontweight="bold")
        else:
            ax3.text(0.5, 0.5, "No experiments found", ha="center", va="center")
            ax3.set_title("Experiment Distribution by Session", fontweight="bold")

        # 4. Active vs Idle time (stacked bar)
        ax4 = axes[1, 1]
        active_time = df["active_time_hours"]
        idle_time = df["duration_hours"] - df["active_time_hours"]

        x_pos = range(len(df))
        ax4.bar(x_pos, active_time, label="Active Time", alpha=0.8, color="green")
        ax4.bar(
            x_pos,
            idle_time,
            bottom=active_time,
            label="Idle Time",
            alpha=0.6,
            color="gray",
        )

        ax4.set_xticks(x_pos)
        ax4.set_xticklabels([f"S{i + 1}" for i in range(len(df))], rotation=45)
        ax4.set_ylabel("Hours")
        ax4.set_title("Active vs Idle Time by Session", fontweight="bold")
        ax4.legend()

    else:
        # No session data - show summary only
        for ax in axes.flat:
            ax.axis("off")

        axes[0, 0].text(
            0.5,
            0.5,
            "No session details available",
            ha="center",
            va="center",
            transform=axes[0, 0].transAxes,
        )
        axes[0, 0].set_title("Session Analysis", fontweight="bold")

    plt.tight_layout()
    plt.show()

    # Print summary
    if summary:
        print("Session Report Summary:")
        print(f"Total Sessions: {summary.get('total_sessions', 0)}")
        print(f"Total Runtime: {summary.get('total_system_runtime_hours', 0):.1f}h")
        print(
            f"Average Utilization: {summary.get('average_system_utilization_percent', 0):.1f}%"
        )
        print(f"Total Experiments: {summary.get('total_experiments', 0)}")
        print(f"Nodes Tracked: {summary.get('nodes_tracked', 0)}")


# Generate session visualizations
if session_report:
    plot_session_analysis(session_report)
else:
    print("Skipping session analysis - no data available")

Summary Dashboard¶

def create_summary_dashboard() -> None:
    """Create an overall summary dashboard."""
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    fig.suptitle("MADSci System Overview Dashboard", fontsize=16, fontweight="bold")

    # Extract key metrics from each report
    periods_metrics = {}
    user_metrics = {}
    session_metrics = {}

    if periods_report and "key_metrics" in periods_report:
        periods_metrics = periods_report["key_metrics"]

    if user_report and "system_summary" in user_report:
        user_metrics = user_report["system_summary"]

    if session_report and "overall_summary" in session_report:
        session_metrics = session_report["overall_summary"]

    # 1. System Utilization Summary
    ax1 = axes[0]
    ax1.axis("off")
    periods_text = f"""
    SYSTEM UTILIZATION

    Average Utilization: {periods_metrics.get("average_utilization", "N/A")}%
    Peak Utilization: {periods_metrics.get("peak_utilization", "N/A")}%
    Peak Period: {periods_metrics.get("peak_period", "N/A")}

    Total Experiments: {periods_metrics.get("total_experiments", "N/A")}
    Total Runtime: {periods_metrics.get("total_runtime_hours", "N/A")}h
    Active Periods: {periods_metrics.get("active_periods", "N/A")}
    """
    ax1.text(
        0.1,
        0.9,
        periods_text,
        fontsize=11,
        verticalalignment="top",
        bbox={"boxstyle": "round,pad=0.5", "facecolor": "lightblue", "alpha": 0.7},
        transform=ax1.transAxes,
    )
    ax1.set_title("Periods Analysis", fontweight="bold", pad=20)

    # 2. User Activity Summary
    ax2 = axes[1]
    ax2.axis("off")
    user_text = f"""
    USER ACTIVITY

    Total Users: {user_report.get("report_metadata", {}).get("total_users", "N/A") if user_report else "N/A"}
    Total Workflows: {user_metrics.get("total_workflows", "N/A")}
    Completion Rate: {user_metrics.get("completion_rate_percent", "N/A")}%

    Total Runtime: {user_metrics.get("total_runtime_hours", "N/A")}h
    Avg Duration: {user_metrics.get("average_workflow_duration_hours", "N/A")}h
    Author Attribution: {user_metrics.get("author_attribution_rate_percent", "N/A")}%
    """
    ax2.text(
        0.1,
        0.9,
        user_text,
        fontsize=11,
        verticalalignment="top",
        bbox={"boxstyle": "round,pad=0.5", "facecolor": "lightgreen", "alpha": 0.7},
        transform=ax2.transAxes,
    )
    ax2.set_title("User Analysis", fontweight="bold", pad=20)

    # 3. Session Summary
    ax3 = axes[2]
    ax3.axis("off")
    session_text = f"""
    SESSION ACTIVITY

    Total Sessions: {session_metrics.get("total_sessions", "N/A")}
    System Runtime: {session_metrics.get("total_system_runtime_hours", "N/A")}h
    Avg Utilization: {session_metrics.get("average_system_utilization_percent", "N/A")}%

    Total Experiments: {session_metrics.get("total_experiments", "N/A")}
    Active Time: {session_metrics.get("total_active_time_hours", "N/A")}h
    Nodes Tracked: {session_metrics.get("nodes_tracked", "N/A")}
    """
    ax3.text(
        0.1,
        0.9,
        session_text,
        fontsize=11,
        verticalalignment="top",
        bbox={"boxstyle": "round,pad=0.5", "facecolor": "lightyellow", "alpha": 0.7},
        transform=ax3.transAxes,
    )
    ax3.set_title("Session Analysis", fontweight="bold", pad=20)

    plt.tight_layout()
    plt.show()


# Generate summary dashboard
print("Creating summary dashboard...")
create_summary_dashboard()

print("\nVisualization complete! All charts have been generated.")
print("Note: Charts are displayed inline in the notebook.")