"""

.. _datasets_imgs_to_coords:

=================================
Transform images into coordinates
=================================

Create a dataset with coordinates derived from peak statistic identification in images.

Why would you want to do this?

* Compare CBMA and IBMA
* Add more studies to your existing CBMA dataset
* Normalize how coordinates were derived (provided the image data is available)
"""
import os

import matplotlib.pyplot as plt
from nilearn.plotting import plot_stat_map

from nimare.dataset import Dataset
from nimare.extract import download_nidm_pain
from nimare.meta.cbma import ALE
from nimare.transforms import ImagesToCoordinates, ImageTransformer
from nimare.utils import get_resource_path

###############################################################################
# Download data
# -----------------------------------------------------------------------------
dset_dir = download_nidm_pain()

###############################################################################
# Load Dataset
# -----------------------------------------------------------------------------
dset_file = os.path.join(get_resource_path(), "nidm_pain_dset.json")
dset = Dataset(dset_file)
dset.update_path(dset_dir)

# ImagesToCoordinates uses z or p statistical maps
z_transformer = ImageTransformer(target="z")
dset = z_transformer.transform(dset)

study_no_images = "pain_02.nidm-1"
# delete images for study
dset.images = dset.images.query(f"id != '{study_no_images}'")

study_no_coordinates = "pain_03.nidm-1"

# delete coordinates for study
dset.coordinates = dset.coordinates.query(f"id != '{study_no_coordinates}'")


###############################################################################
# Inspect Dataset
# -----------------------------------------------------------------------------

# There is only one study contrast with coordinates, but no images
print(f"studies with only coordinates: {set(dset.coordinates['id']) - set(dset.images['id'])}\n")

print(f"studies with only images: {set(dset.images['id']) - set(dset.coordinates['id'])}\n")

# the images dataframe has z maps as one of the columns
print(f"columns in images dataframe: {dset.images.columns}\n")

# there is no z_stat column in the coordinates dataframe
print(f"columns in coordinates dataframe: {dset.coordinates.columns}\n")


###############################################################################
# Use different strategies to overwrite existing coordinate data
# -----------------------------------------------------------------------------
# There are three choices for how to treat existing coordinate
# data in the dataset which are named: 'fill', 'replace', and 'demolish'.
#
# * 'fill' will only create coordinates for study contrasts with images, but
#   no coordinates. With 'fill' you trust and want to keep all
#   existing coordinate data and the transformer will help "fill" in
#   the blanks for study contrasts with no coordinates
# * 'replace' will create coordinates for study contrasts with images.
#   In addition to filling in the blanks, 'replace' will overwrite existing
#   coordinate data if images are available.
#   However, if image data is not available and only coordinates exist
#   for a particular study contrast, those coordinates will be retained
#   in the resulting dataset.
#   With 'replace', you prefer to have coordinates generated consistently
#   by NiMARE, but you will keep other coordinate data if that particular
#   study contrast does not have images.
# * 'demolish' will create coordinates for study contrasts with images
#   and remove any coordinates from the dataset it cannot overwrite.
#   With 'demolish', you only trust coordinates generated by NiMARE.

# create coordinates from statistical maps
coord_fill = ImagesToCoordinates(merge_strategy="fill")
coord_replace = ImagesToCoordinates(merge_strategy="replace")
coord_demolish = ImagesToCoordinates(merge_strategy="demolish")

dset_fill = coord_fill.transform(dset)
dset_replace = coord_replace.transform(dset)
dset_demolish = coord_demolish.transform(dset)

###############################################################################
# Inspect generated datasets
# -----------------------------------------------------------------------------

example_study = "pain_01.nidm-1"

print(f"no coordinate data for {study_no_coordinates}")
assert study_no_coordinates not in dset.coordinates["id"]

# 'fill' will add coordinates for study without coordinates
print(f"'fill' strategy for study {study_no_coordinates}")
print(dset_fill.coordinates.query(f"id == '{study_no_coordinates}'"))
print("\n\n")


# 'replace' will change the data for studies with images
print(f"original data for study {example_study}")
print(dset.coordinates.query(f"id == '{example_study}'"))
print(f"'replace' strategy for study {example_study}")
print(dset_replace.coordinates.query(f"id == '{example_study}'"))

# 'demolish' will remove studies that do not have images
print(f"'demolish' strategy for study {study_no_images}")
assert study_no_images not in dset.coordinates["id"]

# while studies with only coordinates (no images) are in 'replace',
# they are removed from 'demolish'.
print(
    "studies in 'replace', but not 'demolish': "
    f"{set(dset_replace.coordinates['id']) - set(dset_demolish.coordinates['id'])}"
)

###############################################################################
# ALE (CBMA)
# -----------------------------------------------------------------------------
# Run a meta analysis using each of the strategies.
# The biggest difference is between 'fill' and the other two strategies.
# The difference is because in 'fill' most of the original coordinates
# in the dataset are used, whereas with 'replace' and 'demolish' the
# majority/all of the coordinates are generated by NiMARE.

ale = ALE()
res_fill = ale.fit(dset_fill)
res_replace = ale.fit(dset_replace)
res_demolish = ale.fit(dset_demolish)
fig, axs = plt.subplots(3, figsize=(6, 8))
for ax, strat, res in zip(
    axs, ["fill", "replace", "demolist"], [res_fill, res_replace, res_demolish]
):
    plot_stat_map(
        res.get_map("z"),
        cut_coords=[0, 0, -8],
        draw_cross=False,
        cmap="RdBu_r",
        axes=ax,
        title=f"'{strat}' strategy",
    )

fig.show()


###############################################################################
# Tracking positive and negative z-scores
# -----------------------------------------------------------------------------
# There is a new column in the transformed coordinates, ``z_stat``.
# This column contains the z-score of the individual peak.
# Currently, no CBMA algorithm implemented in NiMARE takes advantage
# of z-scores, but we can still take advantage of whether the peak was positive
# or negative by running a CBMA on positive and negative z-scores separately,
# testing the convergence of positive and negative z-scores separately.

coord_two_sided = ImagesToCoordinates(merge_strategy="demolish", two_sided=True)

dset_two_sided = coord_two_sided.transform(dset)

dset_positive = dset_two_sided.copy()
dset_negative = dset_two_sided.copy()
dset_positive.coordinates = dset_two_sided.coordinates.query("z_stat >= 0.0")
dset_negative.coordinates = dset_two_sided.coordinates.query("z_stat < 0.0")

# plot the results
ale = ALE()
res_positive = ale.fit(dset_positive)
res_negative = ale.fit(dset_negative)
fig, axs = plt.subplots(2, figsize=(6, 6))
for ax, sign, res in zip(axs, ["positive", "negative"], [res_positive, res_negative]):
    plot_stat_map(
        res.get_map("z"),
        cut_coords=[0, 0, -8],
        draw_cross=False,
        cmap="RdBu_r",
        axes=ax,
        title=f"'{sign}' z-scores",
    )

fig.show()
