Files
hr_visualize/visualize_hr_data.py

217 lines
7.4 KiB
Python

#!/usr/bin/env python3
"""
Visualize HR data from two CSV files with max value alignment
"""
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timedelta
import seaborn as sns
def load_and_visualize_hr_data():
"""Load and visualize HR data from both CSV files with max value alignment"""
# Load the first CSV file (with timestamps)
print("Loading 2025-06-10_16_26.CSV...")
df1 = pd.read_csv("2025-06-10_16_26.CSV")
print(f"Dataset 1 shape: {df1.shape}")
print(f"Dataset 1 columns: {df1.columns.tolist()}")
print(f"Dataset 1 HR range: {df1['hr'].min():.1f} - {df1['hr'].max():.1f}")
# Load the second CSV file (HR only)
print("\nLoading history_20250610_165414_HR.csv...")
df2 = pd.read_csv("history_20250610_165414_HR.csv")
print(f"Dataset 2 shape: {df2.shape}")
print(f"Dataset 2 columns: {df2.columns.tolist()}")
print(f"Dataset 2 HR range: {df2['HR'].min():.1f} - {df2['HR'].max():.1f}")
# Find the maximum values and their indices
max_hr1 = df1["hr"].max()
max_idx1 = df1["hr"].idxmax()
max_hr2 = df2["HR"].max()
max_idx2 = df2["HR"].idxmax()
print(f"\nDataset 1 max HR: {max_hr1:.1f} at index {max_idx1}")
print(f"Dataset 2 max HR: {max_hr2:.1f} at index {max_idx2}")
# Create time indices for both datasets
df1["time_seconds"] = df1.index # Use index as time in seconds
df2["time_seconds"] = df2.index # Use index as time in seconds
# Align datasets by shifting so max values occur at the same time
# We'll align both to time = 0 at their respective max points
df1["time_aligned"] = df1["time_seconds"] - max_idx1
df2["time_aligned"] = df2["time_seconds"] - max_idx2
print(f"\nAfter alignment:")
print(
f"Dataset 1 time range: {df1['time_aligned'].min():.1f} to {df1['time_aligned'].max():.1f} seconds"
)
print(
f"Dataset 2 time range: {df2['time_aligned'].min():.1f} to {df2['time_aligned'].max():.1f} seconds"
)
# Create the visualization
plt.style.use("seaborn-v0_8")
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle(
"Heart Rate Data Visualization (Max Values Aligned)",
fontsize=16,
fontweight="bold",
)
# Plot 1: Time series of both datasets (aligned)
axes[0, 0].plot(
df1["time_aligned"],
df1["hr"],
"o-",
alpha=0.7,
markersize=3,
label=f"Dataset 1 (max: {max_hr1:.1f})",
color="blue",
)
axes[0, 0].plot(
df2["time_aligned"],
df2["HR"],
"s-",
alpha=0.7,
markersize=3,
label=f"Dataset 2 (max: {max_hr2:.1f})",
color="red",
)
# Highlight the max values
axes[0, 0].axvline(
x=0, color="black", linestyle="--", alpha=0.5, label="Max values aligned"
)
axes[0, 0].scatter([0], [max_hr1], color="blue", s=100, zorder=5, marker="*")
axes[0, 0].scatter([0], [max_hr2], color="red", s=100, zorder=5, marker="*")
axes[0, 0].set_xlabel("Time (seconds, aligned to max)")
axes[0, 0].set_ylabel("Heart Rate (bpm)")
axes[0, 0].set_title("Heart Rate Over Time (Aligned at Max Values)")
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)
# Plot 2: Distribution comparison
axes[0, 1].hist(
df1["hr"], bins=20, alpha=0.7, label="Dataset 1", color="blue", density=True
)
axes[0, 1].hist(
df2["HR"], bins=20, alpha=0.7, label="Dataset 2", color="red", density=True
)
axes[0, 1].axvline(
max_hr1, color="blue", linestyle="--", alpha=0.8, label=f"Max 1: {max_hr1:.1f}"
)
axes[0, 1].axvline(
max_hr2, color="red", linestyle="--", alpha=0.8, label=f"Max 2: {max_hr2:.1f}"
)
axes[0, 1].set_xlabel("Heart Rate (bpm)")
axes[0, 1].set_ylabel("Density")
axes[0, 1].set_title("Heart Rate Distribution")
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)
# Plot 3: Box plot comparison
data_for_box = [df1["hr"], df2["HR"]]
labels = ["Dataset 1\n(2025-06-10_16_26)", "Dataset 2\n(history_20250610_165414)"]
bp = axes[1, 0].boxplot(data_for_box, tick_labels=labels, patch_artist=True)
bp["boxes"][0].set_facecolor("lightblue")
bp["boxes"][1].set_facecolor("lightcoral")
axes[1, 0].set_ylabel("Heart Rate (bpm)")
axes[1, 0].set_title("Heart Rate Distribution Comparison")
axes[1, 0].grid(True, alpha=0.3)
# Plot 4: Overlay plot around max values (zoomed in view)
# Show ±20 seconds around the aligned max values
window = 20
df1_window = df1[(df1["time_aligned"] >= -window) & (df1["time_aligned"] <= window)]
df2_window = df2[(df2["time_aligned"] >= -window) & (df2["time_aligned"] <= window)]
if len(df1_window) > 0:
axes[1, 1].plot(
df1_window["time_aligned"],
df1_window["hr"],
"o-",
alpha=0.8,
markersize=4,
label=f"Dataset 1",
color="blue",
linewidth=2,
)
if len(df2_window) > 0:
axes[1, 1].plot(
df2_window["time_aligned"],
df2_window["HR"],
"s-",
alpha=0.8,
markersize=4,
label=f"Dataset 2",
color="red",
linewidth=2,
)
axes[1, 1].axvline(
x=0, color="black", linestyle="--", alpha=0.5, label="Max alignment"
)
axes[1, 1].scatter(
[0], [max_hr1], color="blue", s=150, zorder=5, marker="*", edgecolor="black"
)
axes[1, 1].scatter(
[0], [max_hr2], color="red", s=150, zorder=5, marker="*", edgecolor="black"
)
axes[1, 1].set_xlabel("Time (seconds, aligned to max)")
axes[1, 1].set_ylabel("Heart Rate (bpm)")
axes[1, 1].set_title(f"Zoomed View Around Max Values (±{window}s)")
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)
plt.tight_layout()
# Save the plot
output_file = "hr_data_visualization_aligned.png"
plt.savefig(output_file, dpi=300, bbox_inches="tight")
print(f"\nVisualization saved as: {output_file}")
# Show the plot
plt.show()
# Print detailed statistics
print("\n" + "=" * 60)
print("DETAILED STATISTICS (MAX VALUES ALIGNED)")
print("=" * 60)
print("\nDataset 1 (2025-06-10_16_26.CSV):")
print(f" Records: {len(df1)}")
print(f" Mean HR: {df1['hr'].mean():.1f} bpm")
print(f" Median HR: {df1['hr'].median():.1f} bpm")
print(f" Max HR: {max_hr1:.1f} bpm (at original index {max_idx1})")
print(f" Std Dev: {df1['hr'].std():.1f} bpm")
print(f" Range: {df1['hr'].min():.1f} - {df1['hr'].max():.1f} bpm")
print("\nDataset 2 (history_20250610_165414_HR.csv):")
print(f" Records: {len(df2)}")
print(f" Mean HR: {df2['HR'].mean():.1f} bpm")
print(f" Median HR: {df2['HR'].median():.1f} bpm")
print(f" Max HR: {max_hr2:.1f} bpm (at original index {max_idx2})")
print(f" Std Dev: {df2['HR'].std():.1f} bpm")
print(f" Range: {df2['HR'].min():.1f} - {df2['HR'].max():.1f} bpm")
print(f"\nAlignment Info:")
print(f" Max HR difference: {abs(max_hr1 - max_hr2):.1f} bpm")
print(
f" Time shift applied: Dataset 1 shifted by -{max_idx1}s, Dataset 2 shifted by -{max_idx2}s"
)
print("\n" + "=" * 60)
return df1, df2
if __name__ == "__main__":
print("Starting HR data visualization with max value alignment...")
df1, df2 = load_and_visualize_hr_data()
print("Visualization complete!")