In [1]:
import pandas as pd
In [2]:
cancer_cases=pd.read_excel(r"C:\Cancer_Cases_deaths_2025.xlsx")
In [3]:
cancer_cases
Out[3]:
| Unnamed: 0 | Rank | Common Type of Cancers | Estimated New Cases 2025 | Estimated Deaths 2025 | |
|---|---|---|---|---|---|
| 0 | NaN | 1.0 | Breast Cancer (Female) | 316950 | 42170 |
| 1 | NaN | 2.0 | Prostate Cancer | 313780 | 35770 |
| 2 | NaN | 3.0 | Lung and Bronchus Cancer | 226650 | 124730 |
| 3 | NaN | 4.0 | Colorectal Cancer | 154270 | 52900 |
| 4 | NaN | 5.0 | Melanoma of the Skin | 104960 | 8430 |
| 5 | NaN | 6.0 | Bladder Cancer | 84870 | 17420 |
| 6 | NaN | 7.0 | Kidney and Renal Pelvis Cancer | 80980 | 14510 |
| 7 | NaN | 8.0 | Non-Hodgkin Lymphoma | 80350 | 19390 |
| 8 | NaN | 9.0 | Uterine Cancer | 69120 | 13860 |
| 9 | NaN | 10.0 | Pancreatic Cancer | 67440 | 51980 |
| 10 | NaN | NaN | - | - | - |
| 11 | NaN | NaN | Cancer of Any Site | 2041910 | 618120 |
In [5]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Configure matplotlib for nicer plots
plt.rcParams['figure.figsize'] = (8, 5)
plt.rcParams['axes.grid'] = True
In [10]:
# Define domain and functions
x = np.linspace(-4*np.pi, 4*np.pi, 800)
f = np.sin(x) # Function 1
g = np.exp(-0.1*x**2) * np.cos(2*x) # Function 2
# Plot both on the same axes with different styles
plt.figure()
plt.plot(x, f, linestyle='-', linewidth=2, label='f(x) = sin(x)') # solid line
plt.plot(x, g, linestyle='--', marker='', linewidth=2, label='g(x) = e^{-0.1x^2} cos(2x)') # dashed line
plt.title('Two Functions on the Same Graph')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
In [11]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharey=False)
# Left: f(x)
axes[0].plot(x, f, linewidth=2)
axes[0].set_title('f(x) = sin(x)')
axes[0].set_xlabel('x')
axes[0].set_ylabel('y')
axes[0].grid(True)
# Right: g(x)
axes[1].plot(x, g, linestyle='--', linewidth=2)
axes[1].set_title('g(x) = e^{-0.1x^2} cos(2x)')
axes[1].set_xlabel('x')
axes[1].grid(True)
plt.tight_layout()
plt.show()
In [13]:
# Update this path if your Excel file is elsewhere
possible_paths = [
r"C:\Cancer_Cases_deaths_2025.xlsx", # same folder as this notebook
'/mnt/data/Cancer_Cases_deaths_2025.xlsx' # fallback path (for demo environment)
]
excel_path = None
for p in possible_paths:
try:
pd.ExcelFile(p) # quick check
excel_path = p
break
except Exception:
continue
if excel_path is None:
raise FileNotFoundError("Excel file not found. Place 'Cancer_Cases_deaths_2025.xlsx' next to this notebook or update excel_path.")
# Load
df = pd.read_excel(excel_path)
# Clean up columns: drop unnamed, coerce numerics
df = df.loc[:, ~df.columns.str.contains('^Unnamed')].copy()
# Try to standardize column names (strip spaces)
df.columns = [c.strip() for c in df.columns]
# Convert numeric columns to numbers
for col in df.columns:
if col.lower().startswith('estimated'):
df[col] = pd.to_numeric(df[col], errors='coerce')
# Show a preview
df.head()
Out[13]:
| Rank | Common Type of Cancers | Estimated New Cases 2025 | Estimated Deaths 2025 | |
|---|---|---|---|---|
| 0 | 1.0 | Breast Cancer (Female) | 316950.0 | 42170.0 |
| 1 | 2.0 | Prostate Cancer | 313780.0 | 35770.0 |
| 2 | 3.0 | Lung and Bronchus Cancer | 226650.0 | 124730.0 |
| 3 | 4.0 | Colorectal Cancer | 154270.0 | 52900.0 |
| 4 | 5.0 | Melanoma of the Skin | 104960.0 | 8430.0 |
In [14]:
label_col = 'Common Type of Cancers'
cases_col = next((c for c in df.columns if 'Estimated New Cases' in c), None)
if cases_col is None:
raise ValueError("Could not find a column containing 'Estimated New Cases' in its name.")
# Drop rows with missing labels/values
pie_df = df[[label_col, cases_col]].dropna()
labels = pie_df[label_col].astype(str).values
values = pie_df[cases_col].astype(float).values
plt.figure()
plt.pie(values, labels=labels, autopct='%1.1f%%', startangle=90)
plt.title('Estimated New Cases (2025) – Share by Cancer Type')
plt.tight_layout()
plt.show()
In [15]:
death_col = next((c for c in df.columns if 'Estimated Deaths' in c), None)
if death_col is None:
raise ValueError("Could not find a column containing 'Estimated Deaths' in its name.")
bar_df = df[[label_col, cases_col, death_col]].dropna()
bar_df = bar_df.sort_values(cases_col, ascending=False)
# Plot grouped bars
indices = np.arange(len(bar_df))
width = 0.4
plt.figure(figsize=(12,6))
plt.bar(indices - width/2, bar_df[cases_col].values, width, label='Estimated New Cases 2025')
plt.bar(indices + width/2, bar_df[death_col].values, width, label='Estimated Deaths 2025')
plt.xticks(indices, bar_df[label_col].astype(str).values, rotation=45, ha='right')
plt.ylabel('Count')
plt.title('Estimated New Cases vs Deaths by Cancer Type (2025)')
plt.legend()
plt.tight_layout()
plt.show()