"""
Fetch data using pooch.
This is inspired by scipy's datasets module.
""" # numpydoc ignore=EX01
import pathlib
import sys
import pooch
from tqdm.auto import tqdm
__all__ = ["DOWNLOADABLE_FILES", "fetch_data"]
def __dir__() -> list[str]:
return __all__
# If you add a file here:
# - add the corresponding entry to REGISTRY_URLS
# - it is useful for users, add it to DOWNLOADABLE_FILES
REGISTRY = {
"plenoptic-test-files.tar.gz": "a6b8e03ecc8d7e40c505c88e6c767af5da670478d3bebb4e13a9d08ee4f39ae8", # noqa: E501
"ssim_images.tar.gz": "19c1955921a3c37d30c88724fd5a13bdbc9620c9e7dfaeaa3ff835283d2bb42e", # noqa: E501
"ssim_analysis.mat": "921d324783f06d1a2e6f1ce154c7ba9204f91c569772936991311ff299597f24", # noqa: E501
"msssim_images.tar.gz": "a01273c95c231ba9e860dfc48f2ac8044ac3db13ad7061739c29ea5f9f20382c", # noqa: E501
"MAD_results.tar.gz": "29794ed7dc14626f115b9e4173bff88884cb356378a1d4f1f6cd940dd5b31dbe", # noqa: E501
"portilla_simoncelli_matlab_test_vectors.tar.gz": "83087d4d9808a3935b8eb4197624bbae19007189cd0d786527084c98b0b0ab81", # noqa: E501
"portilla_simoncelli_test_vectors.tar.gz": "d67787620a0cf13addbe4588ec05b276105ff1fad46e72f8c58d99f184386dfb", # noqa: E501
"portilla_simoncelli_images.tar.gz": "4d3228fbb51de45b4fc81eba590d20f5861a54d9e46766c8431ab08326e80827", # noqa: E501
"portilla_simoncelli_synthesize.npz": "9c304580cd60e0275a2ef663187eccb71f2e6a31883c88acf4c6a699f4854c80", # noqa: E501
"portilla_simoncelli_synthesize_torch_v1.12.0.npz": "5a76ef223bac641c9d48a0b7f49b3ce0a05c12a48e96cd309866b1e7d5e4473f", # noqa: E501
"portilla_simoncelli_synthesize_gpu.npz": "324efc2a6c54382aae414d361c099394227b56cd24460eebab2532f70728c3ee", # noqa: E501
"portilla_simoncelli_scales.npz": "eae2db6bd5db7d37c28d8f8320c4dd4fa5ab38294f5be22f8cf69e5cd5e4936a", # noqa: E501
"sample_images.tar.gz": "0ba6fe668a61e9f3cb52032da740fbcf32399ffcc142ddb14380a8e404409bf5", # noqa: E501
"test_images.tar.gz": "eaf35f5f6136e2d51e513f00202a11188a85cae8c6f44141fb9666de25ae9554", # noqa: E501
"tid2013.tar.gz": "bc486ac749b6cfca8dc5f5340b04b9bb01ab24149a5f3a712f13e9d0489dcde0", # noqa: E501
"portilla_simoncelli_test_vectors_refactor.tar.gz": "b72661836e5830c1473b8a2292075a8e9c1aca00faf97cc6809ec28f19d3f9ce", # noqa: E501
"portilla_simoncelli_synthesize_torch_v1.12.0_ps-refactor.npz": "9525844b71cf81509b86ed9677172745353588c6bb54e4de8000d695598afa47", # noqa: E501
"portilla_simoncelli_synthesize_gpu_ps-refactor.npz": "9fbb490f1548133f6aa49c54832130cf70f8dc6546af59688ead17f62ab94e61", # noqa: E501
"portilla_simoncelli_scales_ps-refactor.npz": "ce11d85e6bcf5fad1b819c36dac584c3e933706a0ee423ea1c76ffe0daccbae5", # noqa: E501
"portilla_simoncelli_synthesize_torch_v1.12.0_ps-refactor-2.npz": "ffd967543d58a03df390008c35878791590520624aa0e5e5a26ad3f877345ab4", # noqa: E501
"example_eigendistortion.pt": "87080836713e8efe1e7ff29538099e82a26b8700080e1bc1d30f00de1a54b2f5", # noqa: E501
"load_image_test.tar.gz": "8a2b92dc0d442695c45b1e908ef0a04cae35c5f21b774a93b9fc6b675423b526", # noqa: E501
"berardino_onoff.pt": "2174a40005489b9c94acc91213b2f6d57a75f262caf118cb1980658eadbfd047", # noqa: E501
"berardino_vgg16.pt": "5e0d10f4a367244879cd8a61c453992370ab801db1f66e10caa1ee2ecfab8ca4", # noqa: E501
"ps_regression.tar.gz": "dcb92f7df6344e7f6760c16ece4395245d43703bf6783629272549674f753faf", # noqa: E501
"example_metamer_gaussian-old.pt": "adef079df878a9e0319cb5ad59791435f9b7eec695e1d8f21019c8e11b891d85", # noqa: E501
"example_metamer_gaussian.pt": "02e12c7c2a93e2e5a83f6d7aa8320368c00641deacfba3359b02fedc9a0dc0f1", # noqa: E501
"example_metamer_gaussian-cuda.pt": "edd80e63bd776b679f714acee62fefa9885a257c66e2699423887aeab7c03794", # noqa: E501
"example_metamerCTF_ps.pt": "060362bb4146649511cf0b8c069450811f1ef842e3763014fbb00808966067d2", # noqa: E501
"example_metamerCTF_ps-cuda.pt": "443dbdec5dbc2a7ffb26fc8076981142d14ab9cee36b294f4d890020a4838816", # noqa: E501
"example_mad.pt": "583c60eab6cfb5c5b031af4960db41cc0db767492871182be06da224cd133767", # noqa: E501
"example_mad-cuda.pt": "fd7e1372397bb57cc31a13ca4886ee73ffd405df64e0bb7d291977ba1b460b77", # noqa: E501
"example_eigendistortion_color.pt": "63147c5ed9588a64b6af4f181a8d0532d3de5639b20ae79c4706ec488c1854dc", # noqa: E501
}
OSF_TEMPLATE = "https://osf.io/download/{}"
# these are all from the OSF project at https://osf.io/ts37w/.
REGISTRY_URLS = {
"plenoptic-test-files.tar.gz": OSF_TEMPLATE.format("q9kn8"),
"ssim_images.tar.gz": OSF_TEMPLATE.format("j65tw"),
"ssim_analysis.mat": OSF_TEMPLATE.format("ndtc7"),
"msssim_images.tar.gz": OSF_TEMPLATE.format("5fuba"),
"MAD_results.tar.gz": OSF_TEMPLATE.format("jwcsr"),
"portilla_simoncelli_matlab_test_vectors.tar.gz": OSF_TEMPLATE.format("qtn5y"),
"portilla_simoncelli_test_vectors.tar.gz": OSF_TEMPLATE.format("8r2gq"),
"portilla_simoncelli_images.tar.gz": OSF_TEMPLATE.format("eqr3t"),
"portilla_simoncelli_synthesize.npz": OSF_TEMPLATE.format("a7p9r"),
"portilla_simoncelli_synthesize_torch_v1.12.0.npz": OSF_TEMPLATE.format("gbv8e"),
"portilla_simoncelli_synthesize_gpu.npz": OSF_TEMPLATE.format("tn4y8"),
"portilla_simoncelli_scales.npz": OSF_TEMPLATE.format("xhwv3"),
"sample_images.tar.gz": OSF_TEMPLATE.format("6drmy"),
"test_images.tar.gz": OSF_TEMPLATE.format("au3b8"),
"tid2013.tar.gz": OSF_TEMPLATE.format("uscgv"),
"portilla_simoncelli_test_vectors_refactor.tar.gz": OSF_TEMPLATE.format("ca7qt"),
"portilla_simoncelli_synthesize_torch_v1.12.0_ps-refactor.npz": OSF_TEMPLATE.format(
"vmwzd"
),
"portilla_simoncelli_synthesize_gpu_ps-refactor.npz": OSF_TEMPLATE.format("mqs6y"),
"portilla_simoncelli_scales_ps-refactor.npz": OSF_TEMPLATE.format("nvpr4"),
"portilla_simoncelli_synthesize_torch_v1.12.0_ps-refactor-2.npz": OSF_TEMPLATE.format( # noqa: E501
"en8du"
),
"example_eigendistortion.pt": OSF_TEMPLATE.format("gwhz2"),
"load_image_test.tar.gz": OSF_TEMPLATE.format("avpzq"),
"berardino_onoff.pt": OSF_TEMPLATE.format("uqfa8"),
"berardino_vgg16.pt": OSF_TEMPLATE.format("6r87b"),
"ps_regression.tar.gz": OSF_TEMPLATE.format("7t4fj/?revision=15"),
"example_metamer_gaussian-old.pt": OSF_TEMPLATE.format("7e48u/?revision=5"),
"example_metamer_gaussian.pt": OSF_TEMPLATE.format("7e48u/?revision=6"),
"example_metamer_gaussian-cuda.pt": OSF_TEMPLATE.format("jzhe7/?revision=3"),
"example_metamerCTF_ps.pt": OSF_TEMPLATE.format("4zr37/?revision=10"),
"example_metamerCTF_ps-cuda.pt": OSF_TEMPLATE.format("627sp/?revision=5"),
"example_eigendistortion_color.pt": OSF_TEMPLATE.format("jc63h/?revision=3"),
"example_mad.pt": OSF_TEMPLATE.format("ersfy/?revision=4"),
"example_mad-cuda.pt": OSF_TEMPLATE.format("qjdbc/?revision=3"),
}
#: List of files that can be downloaded using :func:`~plenoptic.data.fetch_data`
DOWNLOADABLE_FILES = [
"portilla_simoncelli_images.tar.gz",
"test_images.tar.gz",
"example_eigendistortion.pt",
"berardino_onoff.pt",
"berardino_vgg16.pt",
"tid2013.tar.gz",
"ps_regression.tar.gz",
"example_metamer_gaussian.pt",
"example_metamerCTF_ps.pt",
"example_mad.pt",
"example_eigendistortion_color.pt",
]
retriever = pooch.create(
# Use the default cache folder for the operating system
# Pooch uses appdirs (https://github.com/ActiveState/appdirs) to
# select an appropriate directory for the cache on each platform.
path=pooch.os_cache("plenoptic"),
base_url="",
urls=REGISTRY_URLS,
registry=REGISTRY,
retry_if_failed=2,
allow_updates="POOCH_ALLOW_UPDATES",
env="PLENOPTIC_CACHE_DIR",
)
def _find_shared_directory(paths: list[pathlib.Path]) -> pathlib.Path:
"""
Find directory shared by all paths.
Helper function for when downloading tar archives.
Parameters
----------
paths
List of paths to check.
Returns
-------
shared_dir
Most recent common ancestor.
""" # numpydoc ignore=EX01
for dir in paths[0].parents:
if all([dir in p.parents for p in paths]):
break
return dir
[docs]
def fetch_data(dataset_name: str) -> pathlib.Path:
"""
Download data, using pooch. These are largely used for testing.
To view list of downloadable files, look at
:const:`~plenoptic.data.DOWNLOADABLE_FILES`.
This checks whether the data already exists and is unchanged and downloads
again, if necessary. If dataset_name ends in .tar.gz, this also
decompresses and extracts the archive, returning the Path to the resulting
directory. Else, it just returns the Path to the downloaded file.
Parameters
----------
dataset_name
Name of the dataset to download.
Returns
-------
path
Path of the downloaded dataset.
Examples
--------
.. plot::
>>> import plenoptic as po
>>> path = po.data.fetch_data("portilla_simoncelli_images.tar.gz")
>>> len(list(path.glob("*")))
38
>>> img = po.load_images(path / "fig3b.jpg")
>>> po.plot.imshow(img)
<PyrFigure size ...>
"""
processor = pooch.Untar() if dataset_name.endswith(".tar.gz") else None
use_ascii = bool(sys.platform == "win32")
fname = retriever.fetch(
dataset_name,
progressbar=tqdm(
total=1,
ncols=79,
unit_scale=True,
delay=1e-5,
leave=True,
unit="B",
ascii=use_ascii,
),
processor=processor,
)
if dataset_name.endswith(".tar.gz"):
fname = _find_shared_directory([pathlib.Path(f) for f in fname])
else:
fname = pathlib.Path(fname)
return fname