import pandas as pd

cancer_cases=pd.read_excel(r"C:\Cancer_Cases_deaths_2025.xlsx")

cancer_cases

# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Configure matplotlib for nicer plots
plt.rcParams['figure.figsize'] = (8, 5)
plt.rcParams['axes.grid'] = True

# Define domain and functions
x = np.linspace(-4*np.pi, 4*np.pi, 800)
f = np.sin(x)                       # Function 1
g = np.exp(-0.1*x**2) * np.cos(2*x) # Function 2

# Plot both on the same axes with different styles
plt.figure()
plt.plot(x, f, linestyle='-', linewidth=2, label='f(x) = sin(x)')               # solid line
plt.plot(x, g, linestyle='--', marker='', linewidth=2, label='g(x) = e^{-0.1x^2} cos(2x)')  # dashed line
plt.title('Two Functions on the Same Graph')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharey=False)

# Left: f(x)
axes[0].plot(x, f, linewidth=2)
axes[0].set_title('f(x) = sin(x)')
axes[0].set_xlabel('x')
axes[0].set_ylabel('y')
axes[0].grid(True)

# Right: g(x)
axes[1].plot(x, g, linestyle='--', linewidth=2)
axes[1].set_title('g(x) = e^{-0.1x^2} cos(2x)')
axes[1].set_xlabel('x')
axes[1].grid(True)

plt.tight_layout()
plt.show()

# Update this path if your Excel file is elsewhere
possible_paths = [
    r"C:\Cancer_Cases_deaths_2025.xlsx",                 # same folder as this notebook
    '/mnt/data/Cancer_Cases_deaths_2025.xlsx'        # fallback path (for demo environment)
]
excel_path = None
for p in possible_paths:
    try:
        pd.ExcelFile(p)  # quick check
        excel_path = p
        break
    except Exception:
        continue

if excel_path is None:
    raise FileNotFoundError("Excel file not found. Place 'Cancer_Cases_deaths_2025.xlsx' next to this notebook or update excel_path.")

# Load
df = pd.read_excel(excel_path)

# Clean up columns: drop unnamed, coerce numerics
df = df.loc[:, ~df.columns.str.contains('^Unnamed')].copy()

# Try to standardize column names (strip spaces)
df.columns = [c.strip() for c in df.columns]

# Convert numeric columns to numbers
for col in df.columns:
    if col.lower().startswith('estimated'):
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Show a preview
df.head()

label_col = 'Common Type of Cancers'
cases_col = next((c for c in df.columns if 'Estimated New Cases' in c), None)

if cases_col is None:
    raise ValueError("Could not find a column containing 'Estimated New Cases' in its name.")

# Drop rows with missing labels/values
pie_df = df[[label_col, cases_col]].dropna()
labels = pie_df[label_col].astype(str).values
values = pie_df[cases_col].astype(float).values

plt.figure()
plt.pie(values, labels=labels, autopct='%1.1f%%', startangle=90)
plt.title('Estimated New Cases (2025) – Share by Cancer Type')
plt.tight_layout()
plt.show()

death_col = next((c for c in df.columns if 'Estimated Deaths' in c), None)
if death_col is None:
    raise ValueError("Could not find a column containing 'Estimated Deaths' in its name.")

bar_df = df[[label_col, cases_col, death_col]].dropna()
bar_df = bar_df.sort_values(cases_col, ascending=False)

# Plot grouped bars
indices = np.arange(len(bar_df))
width = 0.4

plt.figure(figsize=(12,6))
plt.bar(indices - width/2, bar_df[cases_col].values, width, label='Estimated New Cases 2025')
plt.bar(indices + width/2, bar_df[death_col].values, width, label='Estimated Deaths 2025')

plt.xticks(indices, bar_df[label_col].astype(str).values, rotation=45, ha='right')
plt.ylabel('Count')
plt.title('Estimated New Cases vs Deaths by Cancer Type (2025)')
plt.legend()
plt.tight_layout()
plt.show()

	Unnamed: 0	Rank	Common Type of Cancers	Estimated New Cases 2025	Estimated Deaths 2025
0	NaN	1.0	Breast Cancer (Female)	316950	42170
1	NaN	2.0	Prostate Cancer	313780	35770
2	NaN	3.0	Lung and Bronchus Cancer	226650	124730
3	NaN	4.0	Colorectal Cancer	154270	52900
4	NaN	5.0	Melanoma of the Skin	104960	8430
5	NaN	6.0	Bladder Cancer	84870	17420
6	NaN	7.0	Kidney and Renal Pelvis Cancer	80980	14510
7	NaN	8.0	Non-Hodgkin Lymphoma	80350	19390
8	NaN	9.0	Uterine Cancer	69120	13860
9	NaN	10.0	Pancreatic Cancer	67440	51980
10	NaN	NaN	-	-	-
11	NaN	NaN	Cancer of Any Site	2041910	618120

	Rank	Common Type of Cancers	Estimated New Cases 2025	Estimated Deaths 2025
0	1.0	Breast Cancer (Female)	316950.0	42170.0
1	2.0	Prostate Cancer	313780.0	35770.0
2	3.0	Lung and Bronchus Cancer	226650.0	124730.0
3	4.0	Colorectal Cancer	154270.0	52900.0
4	5.0	Melanoma of the Skin	104960.0	8430.0