Mouse Nuclei
The mouse nuclei dataset is composed of a train and test dataset.
# Imports necessary to execute the code
import matplotlib.pyplot as plt
import numpy as np
from careamics import CAREamist
from careamics.config import create_n2v_configuration
from careamics.utils.metrics import scale_invariant_psnr
from careamics_portfolio import PortfolioManager
from PIL import Image
Import the dataset¶
The dataset can be directly downloaded using the careamics-portfolio
package, which uses pooch
to download the data.
# download file
portfolio = PortfolioManager()
files = portfolio.denoiseg.MouseNuclei_n20.download()
files.sort()
# load images
train_data = np.load(files[1])["X_train"]
print(f"Train data shape: {train_data.shape}")
Visualize data¶
indices = [34, 293, 571, 783]
fig, ax = plt.subplots(2, 2, figsize=(8, 8))
ax[0, 0].imshow(train_data[indices[0]], cmap="gray")
ax[0, 0].set_title(f"Image {indices[0]}")
ax[0, 0].set_xticks([])
ax[0, 0].set_yticks([])
ax[0, 1].imshow(train_data[indices[1]], cmap="gray")
ax[0, 1].set_title(f"Image {indices[1]}")
ax[0, 1].set_xticks([])
ax[0, 1].set_yticks([])
ax[1, 0].imshow(train_data[indices[2]], cmap="gray")
ax[1, 0].set_title(f"Image {indices[2]}")
ax[1, 0].set_xticks([])
ax[1, 0].set_yticks([])
ax[1, 1].imshow(train_data[indices[3]], cmap="gray")
ax[1, 1].set_title(f"Image {indices[3]}")
ax[1, 1].set_xticks([])
ax[1, 1].set_yticks([])
plt.show()
Train with CAREamics¶
The easiest way to use CAREamics is to create a configuration and a CAREamist
.
Create configuration¶
The configuration can be built from scratch, giving the user full control over the various parameters available in CAREamics. However, a straightforward way to create a configuration for a particular algorithm is to use one of the convenience functions.
config = create_n2v_configuration(
experiment_name="mouse_nuclei_n2v",
data_type="array",
axes="SYX",
patch_size=(64, 64),
batch_size=16,
num_epochs=10,
)
print(config)
{'algorithm_config': {'algorithm': 'n2v', 'loss': 'n2v', 'lr_scheduler': {'name': 'ReduceLROnPlateau', 'parameters': {}}, 'model': {'architecture': 'UNet', 'conv_dims': 2, 'depth': 2, 'final_activation': 'None', 'in_channels': 1, 'independent_channels': True, 'n2v2': False, 'num_channels_init': 32, 'num_classes': 1}, 'optimizer': {'name': 'Adam', 'parameters': {'lr': 0.0001}}}, 'data_config': {'axes': 'SYX', 'batch_size': 16, 'data_type': 'array', 'patch_size': [64, 64], 'transforms': [{'flip_x': True, 'flip_y': True, 'name': 'XYFlip', 'p': 0.5}, {'name': 'XYRandomRotate90', 'p': 0.5}, {'masked_pixel_percentage': 0.2, 'name': 'N2VManipulate', 'roi_size': 11, 'strategy': 'uniform', 'struct_mask_axis': 'none', 'struct_mask_span': 5}]}, 'experiment_name': 'mouse_nuclei_n2v', 'training_config': {'accumulate_grad_batches': 1, 'check_val_every_n_epoch': 1, 'checkpoint_callback': {'auto_insert_metric_name': False, 'mode': 'min', 'monitor': 'val_loss', 'save_last': True, 'save_top_k': 3, 'save_weights_only': False, 'verbose': False}, 'enable_progress_bar': True, 'gradient_clip_algorithm': 'norm', 'max_steps': -1, 'num_epochs': 10, 'precision': '32'}, 'version': '0.1.0'}
Train¶
A CAREamist
can be created using a configuration alone, and then be trained by using the data already loaded in memory.
# instantiate a CAREamist
careamist = CAREamist(source=config)
# train
careamist.train(
train_source=train_data,
)
Predict with CAREamics¶
Prediction is done with the same CAREamist
used for training. Because the image is large we predict using tiling.
prediction = careamist.predict(source=train_data)
Visualize the prediction¶
# download ground truth
files = portfolio.denoiseg.MouseNuclei_n0.download()
files.sort()
# load images
gt_data = np.load(files[1])["X_train"]
print(f"GT data shape: {gt_data.shape}")
Downloading file 'denoiseg-MouseNuclei_n0' from 'https://zenodo.org/record/5157001/files/Mouse_n0.zip?download=1' to '/home/melisande.croft/.cache/portfolio'.
100%|█████████████████████████████████████| 12.4M/12.4M [00:00<00:00, 25.6GB/s] Unzipping contents of '/home/melisande.croft/.cache/portfolio/denoiseg-MouseNuclei_n0' to '/home/melisande.croft/.cache/portfolio/denoiseg-MouseNuclei_n0.unzip'
GT data shape: (908, 128, 128)
indices = [389, 621]
for i in indices:
# compute psnr
psnr_noisy = scale_invariant_psnr(gt_data[i], train_data[i])
psnr_denoised = scale_invariant_psnr(gt_data[i], prediction[i].squeeze())
# plot images
fig, ax = plt.subplots(1, 3, figsize=(10, 10))
ax[0].imshow(train_data[i], cmap="gray")
ax[0].set_title(f"Noisy Image\nPSNR: {psnr_noisy:.2f}")
ax[0].set_xticks([])
ax[0].set_yticks([])
ax[1].imshow(prediction[i].squeeze(), cmap="gray")
ax[1].set_title(f"Denoised Image\nPSNR: {psnr_denoised:.2f}")
ax[1].set_xticks([])
ax[1].set_yticks([])
ax[2].imshow(gt_data[i], cmap="gray")
ax[2].set_title("GT Image")
ax[2].set_xticks([])
ax[2].set_yticks([])
plt.show()
Compute metrics¶
psnrs = np.zeros(gt_data.shape[0])
for i in range(gt_data.shape[0]):
psnrs[i] = scale_invariant_psnr(gt_data[i], prediction[i].squeeze())
print(f"PSNR: {np.mean(psnrs):.2f} ± {np.std(psnrs):.2f}")
PSNR: 32.83 ± 1.93
Export the model¶
The model is automatically saved during training (the so-called checkpoints
) and can be loaded back easily, but you can also export the model to the BioImage Model Zoo format.
# create a cover image
index_cover = 5
height, width = 128, 128
# create image
cover = np.zeros((height, width))
# normalize train and prediction
norm_train = (train_data[index_cover] - train_data[index_cover].min()) / (
train_data[index_cover].max() - train_data[index_cover].min()
)
pred = prediction[index_cover].squeeze()
norm_pred = (pred - pred.min()) / (pred.max() - pred.min())
# fill in halves
cover[:, : width // 2] = norm_train[:height, : width // 2]
cover[:, width // 2 :] = norm_pred[:height, width // 2 :]
# plot the single image
plt.imshow(cover, cmap="gray")
# save the image
im = Image.fromarray(cover * 255)
im = im.convert("L")
im.save("MouseNuclei_Noise2Void.jpeg")
# Export the model
careamist.export_to_bmz(
path_to_archive="mousenuclei_n2v_model.zip",
friendly_model_name="MouseNuclei_N2V",
input_array=train_data[[indices[0]]].astype(np.float32),
authors=[{"name": "CAREamics authors", "affiliation": "Human Technopole"}],
)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] /localscratch/miniforge3/envs/careamics/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.
Predicting: | | 0/? [00:00<?, ?it/s]
computing SHA256 of inputs.npy (result: 18e748ccde0eadc2b8fba14e2cb7adecfbc5cec0f2b2f26e8bab8938ed02baf4): 100%|██████████| 1/1 [00:00<00:00, 1373.38it/s] computing SHA256 of outputs.npy (result: edced3dcb9d142475c6a33d5319af53a12fa203944129d8337a2394062d64d1e): 100%|██████████| 1/1 [00:00<00:00, 1628.86it/s] computing SHA256 of environment.yml (result: d743faa28b30f054d6caa699271006e27d3cda48e07c02cf092b4e0732830574): 100%|██████████| 1/1 [00:00<00:00, 2312.19it/s] computing SHA256 of weights.pth (result: 2865b52eed167be33d33962cdbe5952235524dd6b7c2ac62f6e89164f7f66b2e): 100%|██████████| 16/16 [00:00<00:00, 7911.92it/s] computing SHA256 of config.yml (result: 432052f073c6f83dea8cfcd1522f29e0fc6e7460f2b73fbfcb26fd7d074c4cba): 100%|██████████| 1/1 [00:00<00:00, 1701.54it/s] 2024-09-19 18:36:53.880 | Level 30 | bioimageio.spec._internal.field_warning:issue_warning:149 - documentation: No '# Validation' (sub)section found in /home/melisande.croft/.careamics/README.md. 2024-09-19 18:36:53.912 | INFO | bioimageio.core._resource_tests:_test_model_inference:147 - starting 'Reproduce test outputs from test inputs (pytorch_state_dict)' /localscratch/miniforge3/envs/careamics/lib/python3.10/site-packages/bioimageio/core/model_adapters/_pytorch_model_adapter.py:44: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state: Any = torch.load( 2024-09-19 18:36:54.333 | INFO | bioimageio.core._resource_tests:_test_model_inference_parametrized:242 - Testing inference with 2 different input tensor sizes /localscratch/miniforge3/envs/careamics/lib/python3.10/site-packages/bioimageio/core/model_adapters/_pytorch_model_adapter.py:44: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state: Any = torch.load( computing SHA256 of config.yml (result: 432052f073c6f83dea8cfcd1522f29e0fc6e7460f2b73fbfcb26fd7d074c4cba): 100%|██████████| 1/1 [00:00<00:00, 279.58it/s] computing SHA256 of inputs.npy (result: 18e748ccde0eadc2b8fba14e2cb7adecfbc5cec0f2b2f26e8bab8938ed02baf4): 100%|██████████| 1/1 [00:00<00:00, 244.78it/s] computing SHA256 of outputs.npy (result: edced3dcb9d142475c6a33d5319af53a12fa203944129d8337a2394062d64d1e): 100%|██████████| 1/1 [00:00<00:00, 292.78it/s] computing SHA256 of environment.yml (result: d743faa28b30f054d6caa699271006e27d3cda48e07c02cf092b4e0732830574): 100%|██████████| 1/1 [00:00<00:00, 288.80it/s] computing SHA256 of weights.pth (result: 2865b52eed167be33d33962cdbe5952235524dd6b7c2ac62f6e89164f7f66b2e): 100%|██████████| 16/16 [00:00<00:00, 2695.03it/s]