From 2acafc650cfc849e29a6aac444b1f9fa451cebe1 Mon Sep 17 00:00:00 2001 From: Neil Shephard Date: Fri, 12 Jan 2024 12:01:12 +0000 Subject: [PATCH 1/8] Restore cmap to config; simpler config of cmap/dpi/image format Closes #776 + Aligns command line and configuration field names with those in matplotlibrc files. + Restores the `cmap` configuration option to `default_config.yaml` and introduces `savefig_dpi` option. + Adds command line options for setting DPI (`--savefig-dpi`), Colormap (`--cmap`)and Output file format (`--savefig-format`). + Expands documentation on how to use custom configuration files or command line options to set the DPI/Colormap/Output format. + Updates the header to `topostats.mplstyle` to explain how to use it as typically users will have created a copy of the file (after the convenience function `topostats create-matplotlibrc` was introduced with #773). + To achieve this the dictionary `config["plotting"]` needed explicitly updating as the `update_config()` function doesn't update nested configurations (since this is the first PR that introduces command line options that modify any of the values in the nested dictionaries). + Updates options for `topostats toposum`` to align with `savefig_format` and adds flag to entry point so output format is consistent. + Updates and expands the configuration documentation explaining how to use these conveniences. As a consequence quite a few files are touched to ensure that validation and processing functions all have variables that align with those in the configuration. If users could test this it would be very much appreciated, if you use the Git installed version something like the following would switch branches and allow you test it. ``` conda create --name topostats-config # Create and activate a virtual env specific to this conda activate topostats-config cd ~/path/to/TopoStats git pull git checkout ns-rse/776-config-jigging pip install -e . topostats process --output-dir base topostats create-config test_config.yaml # Create test_config.yaml to try changing parameters topostats process --config test_config.yaml --output-dir test1 topostats process --output-dir test2 --savefig-dpi 10 --cmap rainbow --savefig-format svg topostats process --config test_config.yaml --output-dir test3 --savefig-dpi 80 --cmap viridis --savefig-format pdf ``` Each invocation of `topostats process` will save output to its own directory (either `base`, `test1`, `test2` and `test3`) for comparison. There should be differences between each `base` the values used in `test_config.yaml` and saved under `test1` and those under `test2` and `test3` should also differ. I would really appreciate feedback on the documentation as without clear documentation it is perhaps confusing how the components interact and work and can be modified and getting this as clear as possible will be really helpful. --- docs/configuration.md | 228 +++++++++++++++++++++-------- tests/test_plotting.py | 3 +- tests/test_plottingfuncs.py | 3 +- tests/test_processing.py | 5 +- topostats/default_config.yaml | 4 +- topostats/entry_point.py | 35 ++++- topostats/plotting.py | 10 +- topostats/plotting_dictionary.yaml | 68 ++++----- topostats/plottingfuncs.py | 29 ++-- topostats/processing.py | 6 +- topostats/run_topostats.py | 16 +- topostats/summary_config.yaml | 2 +- topostats/topostats.mplstyle | 23 +-- topostats/utils.py | 1 + topostats/validation.py | 90 +++++++----- 15 files changed, 349 insertions(+), 174 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 4e28f4347ff..97c266c3070 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -54,55 +54,58 @@ above: Aside from the comments in YAML file itself the fields are described below. -| Section | Sub-Section | Data Type | Default | Description | -| :-------------- | :-------------------------------- | :--------- | :-------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `base_dir` | | string | `./` | Directory to recursively search for files within.[^1] | -| `output_dir` | | string | `./output` | Directory that output should be saved to.[^1] | -| `log_level` | | string | `info` | Verbosity of logging, options are (in increasing order) `warning`, `error`, `info`, `debug`. | -| `cores` | | integer | `2` | Number of cores to run parallel processes on. | -| `file_ext` | | string | `.spm` | File extensions to search for. | -| `loading` | `channel` | string | `Height` | The channel of data to be processed, what this is will depend on the file-format you are processing and the channel you wish to process. | -| `filter` | `run` | boolean | `true` | Whether to run the filtering stage, without this other stages won't run so leave as `true`. | -| | `threshold_method` | str | `std_dev` | Threshold method for filtering, options are `ostu`, `std_dev` or `absolute`. | -| | `otsu_threshold_multiplier` | float | `1.0` | Factor by which the derived Otsu Threshold should be scaled. | -| | `threshold_std_dev` | dictionary | `10.0, 1.0` | A pair of values that scale the standard deviation, after scaling the standard deviation `below` is subtracted from the image mean to give the below/lower threshold and the `above` is added to the image mean to give the above/upper threshold. These values should _always_ be positive. | -| | `threshold_absolute` | dictionary | `-1.0, 1.0` | Below (first) and above (second) absolute threshold for separating data from the image background. | -| | `gaussian_size` | float | `0.5` | The number of standard deviations to build the Gaussian kernel and thus affects the degree of blurring. See [skimage.filters.gaussian](https://scikit-image.org/docs/dev/api/skimage.filters.html#skimage.filters.gaussian) and `sigma` for more information. | -| | `gaussian_mode` | string | `nearest` | | -| `grains` | `run` | boolean | `true` | Whether to run grain finding. Options `true`, `false` | -| | `row_alignment_quantile` | float | `0.5` | Quantile (0.0 to 1.0) to be used to determine the average background for the image. below values may improve flattening of large features. | -| | `smallest_grain_size_nm2` | int | `100` | The smallest size of grains to be included (in nm^2), anything smaller than this is considered noise and removed. **NB** must be `> 0.0`. | -| | `threshold_method` | float | `std_dev` | Threshold method for grain finding. Options : `otsu`, `std_dev`, `absolute` | -| | `otsu_threshold_multiplier` | | `1.0` | Factor by which the derived Otsu Threshold should be scaled. | -| | `threshold_std_dev` | dictionary | `10.0, 1.0` | A pair of values that scale the standard deviation, after scaling the standard deviation `below` is subtracted from the image mean to give the below/lower threshold and the `above` is added to the image mean to give the above/upper threshold. These values should _always_ be positive. | -| | `threshold_absolute` | dictionary | `-1.0, 1.0` | Below (first), above (second) absolute threshold for separating grains from the image background. | -| | `direction` | | `above` | Defines whether to look for grains above or below thresholds or both. Options: `above`, `below`, `both` | -| | `smallest_grain_size` | int | `50` | Catch-all value for the minimum size of grains. Measured in nanometres squared. All grains with area below than this value are removed. | -| | `absolute_area_threshold` | dictionary | `[300, 3000], [null, null]` | Area thresholds for above the image background (first) and below the image background (second), which grain sizes are permitted, measured in nanometres squared. All grains outside this area range are removed. | -| | `remove_edge_intersecting_grains` | boolean | `true` | Whether to remove grains that intersect the image border. _Do not change this unless you know what you are doing_. This will ruin any statistics relating to grain size, shape and DNA traces. | -| `grainstats` | `run` | boolean | `true` | Whether to calculate grain statistics. Options : `true`, `false` | -| | `cropped_size` | float | `40.0` | Force cropping of grains to this length (in nm) of square cropped images (can take `-1` for grain-sized box) | -| | `edge_detection_method` | str | `binary_erosion` | Type of edge detection method to use when determining the edges of grain masks before calculating statistics on them. Options : `binary_erosion`, `canny`. | -| `dnatracing` | `run` | boolean | `true` | Whether to run DNA Tracing. Options : true, false | -| | `min_skeleton_size` | int | `10` | The minimum number of pixels a skeleton should be for statistics to be calculated on it. Anything smaller than this is dropped but grain statistics are retained. | -| | `skeletonisation_method` | str | `topostats` | Skeletonisation method to use, possible options are `zhang`, `lee`, `thin` (from [Scikit-image Morphology module](https://scikit-image.org/docs/stable/api/skimage.morphology.html)) or the original bespoke TopoStas method `topostats`. | -| | `spline_step_size` | float | `7.0e-9` | The sampling rate of the spline in metres. This is the frequency at which points are sampled from fitted traces to act as guide points for the splining process using scipy's splprep. | -| | `spline_linear_smoothing` | float | `5.0` | The amount of smoothing to apply to splines of linear molecule traces. | -| | `spline_circular_smoothing` | float | `0.0` | The amount of smoothing to apply to splines of circular molecule traces. | -| | `pad_width` | int | 10 | Padding for individual grains when tracing. This is sometimes required if the bounding box around grains is too tight and they touch the edge of the image. | -| | `cores` | int | 1 | Number of cores to use for tracing. **NB** Currently this is NOT used and should be left commented in the YAML file. | -| `plotting` | `run` | boolean | `true` | Whether to run plotting. Options : `true`, `false` | -| | `style` | str | `topostats.mplstyle` | The default loads a custom [matplotlibrc param file](https://matplotlib.org/stable/users/explain/customizing.html#the-matplotlibrc-file) that comes with TopoStats. Users can specify the path to their own style file as an alternative. | -| | `save_format` | string | `png` | Format to save images in, see [matplotlib.pyplot.savefig](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html) | -| | `pixel_interpolation` | string | null | Interpolation method for image plots. Recommended default 'null' prevents banding that occurs in some images. If interpolation is needed, we recommend `gaussian`. See [matplotlib imshow interpolations documentation](https://matplotlib.org/stable/gallery/images_contours_and_fields/interpolation_methods.html) for details. | -| | `image_set` | string | `all` | Which images to plot. Options : `all`, `core` | -| | `zrange` | list | `[0, 3]` | Low (first number) and high (second number) height range for core images (can take [null, null]). **NB** `low <= high` otherwise you will see a `ValueError: minvalue must be less than or equal to maxvalue` error. | -| | `colorbar` | boolean | `true` | Whether to include the colorbar scale in plots. Options `true`, `false` | -| | `axes` | boolean | `true` | Whether to include the axes in the produced plots. | -| | `num_ticks` | null / int | `null` | Number of ticks to have along the x and y axes. Options : `null` (auto) or an integer >1 | -| | `histogram_log_axis` | boolean | `false` | Whether to plot hisograms using a logarithmic scale or not. Options: `true`, `false`. | -| `summary_stats` | `run` | boolean | `true` | Whether to generate summary statistical plots of the distribution of different metrics grouped by the image that has been processed. | -| | `config` | str | `null` | Path to a summary config YAML file that configures/controls how plotting is done. If one is not specified either the command line argument `--summary_config` value will be used or if that option is not invoked the default `topostats/summary_config.yaml` will be used. | +| Section | Sub-Section | Data Type | Default | Description | +| :-------------- | :-------------------------------- | :------------- | :-------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `base_dir` | | string | `./` | Directory to recursively search for files within.[^1] | +| `output_dir` | | string | `./output` | Directory that output should be saved to.[^1] | +| `log_level` | | string | `info` | Verbosity of logging, options are (in increasing order) `warning`, `error`, `info`, `debug`. | +| `cores` | | integer | `2` | Number of cores to run parallel processes on. | +| `file_ext` | | string | `.spm` | File extensions to search for. | +| `loading` | `channel` | string | `Height` | The channel of data to be processed, what this is will depend on the file-format you are processing and the channel you wish to process. | +| `filter` | `run` | boolean | `true` | Whether to run the filtering stage, without this other stages won't run so leave as `true`. | +| | `threshold_method` | str | `std_dev` | Threshold method for filtering, options are `ostu`, `std_dev` or `absolute`. | +| | `otsu_threshold_multiplier` | float | `1.0` | Factor by which the derived Otsu Threshold should be scaled. | +| | `threshold_std_dev` | dictionary | `10.0, 1.0` | A pair of values that scale the standard deviation, after scaling the standard deviation `below` is subtracted from the image mean to give the below/lower threshold and the `above` is added to the image mean to give the above/upper threshold. These values should _always_ be positive. | +| | `threshold_absolute` | dictionary | `-1.0, 1.0` | Below (first) and above (second) absolute threshold for separating data from the image background. | +| | `gaussian_size` | float | `0.5` | The number of standard deviations to build the Gaussian kernel and thus affects the degree of blurring. See [skimage.filters.gaussian](https://scikit-image.org/docs/dev/api/skimage.filters.html#skimage.filters.gaussian) and `sigma` for more information. | +| | `gaussian_mode` | string | `nearest` | | +| `grains` | `run` | boolean | `true` | Whether to run grain finding. Options `true`, `false` | +| | `row_alignment_quantile` | float | `0.5` | Quantile (0.0 to 1.0) to be used to determine the average background for the image. below values may improve flattening of large features. | +| | `smallest_grain_size_nm2` | int | `100` | The smallest size of grains to be included (in nm^2), anything smaller than this is considered noise and removed. **NB** must be `> 0.0`. | +| | `threshold_method` | float | `std_dev` | Threshold method for grain finding. Options : `otsu`, `std_dev`, `absolute` | +| | `otsu_threshold_multiplier` | | `1.0` | Factor by which the derived Otsu Threshold should be scaled. | +| | `threshold_std_dev` | dictionary | `10.0, 1.0` | A pair of values that scale the standard deviation, after scaling the standard deviation `below` is subtracted from the image mean to give the below/lower threshold and the `above` is added to the image mean to give the above/upper threshold. These values should _always_ be positive. | +| | `threshold_absolute` | dictionary | `-1.0, 1.0` | Below (first), above (second) absolute threshold for separating grains from the image background. | +| | `direction` | | `above` | Defines whether to look for grains above or below thresholds or both. Options: `above`, `below`, `both` | +| | `smallest_grain_size` | int | `50` | Catch-all value for the minimum size of grains. Measured in nanometres squared. All grains with area below than this value are removed. | +| | `absolute_area_threshold` | dictionary | `[300, 3000], [null, null]` | Area thresholds for above the image background (first) and below the image background (second), which grain sizes are permitted, measured in nanometres squared. All grains outside this area range are removed. | +| | `remove_edge_intersecting_grains` | boolean | `true` | Whether to remove grains that intersect the image border. _Do not change this unless you know what you are doing_. This will ruin any statistics relating to grain size, shape and DNA traces. | +| `grainstats` | `run` | boolean | `true` | Whether to calculate grain statistics. Options : `true`, `false` | +| | `cropped_size` | float | `40.0` | Force cropping of grains to this length (in nm) of square cropped images (can take `-1` for grain-sized box) | +| | `edge_detection_method` | str | `binary_erosion` | Type of edge detection method to use when determining the edges of grain masks before calculating statistics on them. Options : `binary_erosion`, `canny`. | +| `dnatracing` | `run` | boolean | `true` | Whether to run DNA Tracing. Options : true, false | +| | `min_skeleton_size` | int | `10` | The minimum number of pixels a skeleton should be for statistics to be calculated on it. Anything smaller than this is dropped but grain statistics are retained. | +| | `skeletonisation_method` | str | `topostats` | Skeletonisation method to use, possible options are `zhang`, `lee`, `thin` (from [Scikit-image Morphology module](https://scikit-image.org/docs/stable/api/skimage.morphology.html)) or the original bespoke TopoStas method `topostats`. | +| | `spline_step_size` | float | `7.0e-9` | The sampling rate of the spline in metres. This is the frequency at which points are sampled from fitted traces to act as guide points for the splining process using scipy's splprep. | +| | `spline_linear_smoothing` | float | `5.0` | The amount of smoothing to apply to splines of linear molecule traces. | +| | `spline_circular_smoothing` | float | `0.0` | The amount of smoothing to apply to splines of circular molecule traces. | +| | `pad_width` | int | 10 | Padding for individual grains when tracing. This is sometimes required if the bounding box around grains is too tight and they touch the edge of the image. | +| | `cores` | int | 1 | Number of cores to use for tracing. **NB** Currently this is NOT used and should be left commented in the YAML file. | +| `plotting` | `run` | boolean | `true` | Whether to run plotting. Options : `true`, `false` | +| | `style` | str | `topostats.mplstyle` | The default loads a custom [matplotlibrc param file](https://matplotlib.org/stable/users/explain/customizing.html#the-matplotlibrc-file) that comes with TopoStats. Users can specify the path to their own style file as an alternative. | +| | `save_format` | string | `null` | Format to save images in, `null` defaults to `png` see [matplotlib.pyplot.savefig](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html) | +| | `savefig_dpi` | string / float | `null` | Dots Per Inch (DPI), if `null` then the value `figure` is used, for other values (typically integers) see [#further-customisation] and [Matplotlib](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html). | +| | `pixel_interpolation` | string | `null` | Interpolation method for image plots. Recommended default 'null' prevents banding that occurs in some images. If interpolation is needed, we recommend `gaussian`. See [matplotlib imshow interpolations documentation](https://matplotlib.org/stable/gallery/images_contours_and_fields/interpolation_methods.html) for details. | +| | `image_set` | string | `all` | Which images to plot. Options : `all`, `core` | +| | `zrange` | list | `[0, 3]` | Low (first number) and high (second number) height range for core images (can take [null, null]). **NB** `low <= high` otherwise you will see a `ValueError: minvalue must be less than or equal to maxvalue` error. | +| | `colorbar` | boolean | `true` | Whether to include the colorbar scale in plots. Options `true`, `false` | +| | `axes` | boolean | `true` | Whether to include the axes in the produced plots. | +| | `num_ticks` | null / int | `null` | Number of ticks to have along the x and y axes. Options : `null` (auto) or an integer >1 | +| | `cmap` | string | `null` | Colormap/colourmap to use (defaults to 'nanoscope' if null (defined in `topostats/topostats.mplstyle`). Other options are 'afmhot', 'viridis' etc., see [Matplotlib : Choosing Colormaps](https://matplotlib.org/stable/users/explain/colors/colormaps.html). | +| | `mask_cmap` | string | `blu` | Color used when masking regions. Options `blu`, `jet_r` or any valid Matplotlib colour. | +| | `histogram_log_axis` | boolean | `false` | Whether to plot hisograms using a logarithmic scale or not. Options: `true`, `false`. | +| `summary_stats` | `run` | boolean | `true` | Whether to generate summary statistical plots of the distribution of different metrics grouped by the image that has been processed. | +| | `config` | str | `null` | Path to a summary config YAML file that configures/controls how plotting is done. If one is not specified either the command line argument `--summary_config` value will be used or if that option is not invoked the default `topostats/summary_config.yaml` will be used. | ## Summary Configuration @@ -140,8 +143,8 @@ TopoStats generates a number of images of the scans at various steps in the proc Python library [Matplotlib](matplotlib.org/stable/). A custom [`matplotlibrc`](https://matplotlib.org/stable/users/explain/customizing.html#the-matplotlibrc-file) file is included in TopoStats which defines the default parameters for generating images. This covers _all_ aspects of a plot that can be -customised, for example we define custom colour maps `nanoscope` and `afmhot` and by default the former is configured to -be used in this file. Other parameters that are customised are the `font.size` which affects axis labels and titles. +customised, for example we define custom colour maps `nanoscope` and `afmhot`. By default the former is configured to +be used. Other parameters that are customised are the `font.size` which affects axis labels and titles. If you wish to modify the look of all images that are output you can generate a copy of the default configuration using `topostats create-matplotlibrc` command which will write the output to `topostats.mplstyle` by default (**NB** there are @@ -165,28 +168,131 @@ through the basics. ### Further customisation -Whilst the broad overall look of images is controlled in this manner there is one additional file that controls how +Whilst the overall look of images is controlled in this manner there is one additional file that controls how images are plotted in terms of filenames, titles and image types and whether an image is part of the `core` subset that are always generated or not. -During development it was found that setting high DPI took a long time to generate and save some of the images which -slowed down the overall processing time. The solution we have implemented is a file `topostats/plotting_dictionary.yaml` -which sets these parameters on a per-image basis and these over-ride settings defined in default `topostats.mplstyle` or -any user generated document. +This is the `topostats/plotting_dictionary.yaml` which for each image stage defines whether it is a component of the +`core` subset of images that are always generated, sets the `filename`, the `title` on the plot, the `image_type` +(whether it is a binary image), the `savefig_dpi` which controls the Dots Per Inch (essentially the resolution). Each +image has the following structure. -If you have to change these, for example if there is a particular image not included in the `core` set that you always -want produced or you wish to change the DPI (Dots Per Inch) of a particular image you will have to locate this file and -manually edit it. Where this is depends on how you have installed TopoStats, if it is from a clone of the Git repository -then it can be found in `TopoStats/topostats/plotting_dictionary.yaml`. If you have installed from PyPI using `pip -install topostats` then it will be under the virtual environment you have created +```yaml +z_threshed: + title: "Height Thresholded" + image_type: "non-binary" + savefig_dpi: 100 + core_set: true +``` + +The following section describes how to override the DPI settings defined in this file and change the global `cmap` +(colormap/colourmap) used in plotting and output format. + +#### DPI + +During development it was found that setting high DPI globally for all images had a detrimental impact on processing +speeds, slowing down the overall processing time. The solution we have implemented is to use the +`topostats/plotting_dictionary.yaml` file and set the `savefig_dpi` parameter on a per-image basis. + +If you wish to change the DPI there are two options, you can change the value for _all_ images by modifying the setting +in your a [custom configuration](#generating-a-configuration) by modifying the `savefig_dpi` from `null` to your desired +value. The example below shows a section of the configuration file you can generate and setting this value to `400`. + +```yaml +plotting: + run: true # Options : true, false + style: topostats.mplstyle # Options : topostats.mplstyle or path to a matplotlibrc params file + savefig_format: null # Options : null (defaults to png) or see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html + savefig_dpi: 400 # Options : null (defaults to format) see https://afm-spm.github.io/TopoStats/main/configuration.html#further-customisation and https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html +``` + +The value in the configuration file (or the default if none is specified) can also be configured at run-time +using the `--savefig-dpi ###` option to the `topostats process`. This will over-ride both the default or any value +specified in a custom configuration you may have set. The following sets this to `400` + +```bash +topostats process --savefig-dpi 400 +``` + +**NB** Changing the DPI in this manner will apply to _all_ images and may significantly reduce processing speed as it +takes longer to write images with high DPI to disk. + +If you wish to have fine grained control over the DPI on a per-image basis when batch processing then your only recourse +is to change the values in `topostats/plotting_dictionary.yaml`. Where this is depends on how you have installed +TopoStats, if it is from a clone of the Git repository then it can be found in +`TopoStats/topostats/plotting_dictionary.yaml`. If you have installed from PyPI using `pip install topostats` then it +will be under the virtual environment you have created e.g. `~/.virtualenvs/topostats/lib/python3.11/site-packages/topostats/topostats/plotting_dictionary.yaml` if you are using plain virtual environments or `~/miniconda3/envs/topostats/lib/python3.11/site-packages/topostats/topostats/plotting_dictionary.yaml` if you are using Conda environments and chose `~/miniconda3` as the base directory when installing Conda. +If you have installed TopoStats from the cloned Git repository the file will be under +`TopoStats/topostats/plotting_dictionary.yaml`. + **NB** The exact location will be highly specific to your system so the above are just guides as to where to find things. +#### Colormap + +The colormap used to plot images is set globally in `topostats/default_config.yaml`. TopoStats includes two custom +colormaps `nanoscope` and `afmhot` but any colormap recognised by Matplotlib can be used (see the [Matplotlib Colormap +reference](https://matplotlib.org/stable/gallery/color/colormap_reference.html) for choices). + +If you want to modify the colormap that is used you have two options. Firstly you can [generate a +configuration](generating-a-configuration) file and modify the field `cmap` to your choice. The example below shows +changing this from `null` (which defaults to `nanoscope` as defined in `topostats.mplstyle`) to `rainbow`. + +```yaml +plotting: + ... + cmap: rainbow # Colormap/colourmap to use (default is 'nanoscope' which is used if null, other options are 'afmhot', 'viridis' etc.) +``` + +Alternatively it is possible to specify the colormap that is used on the command line using the `--cmap` option to +`topostats process`. This will over-ride both the default or any value specified in a custom configuration you may have +set. The following sets this to `rainbow`. + +```bash +topostats process --cmap rainbow +``` + +#### Saved Image format + +Matplotlib, and by extension TopoStats, supports saving images in a range of different formats including `png` +([Portable Network Graphic](https://en.wikipedia.org/wiki/PNG)), `svg` ([Scalable Vector +Graphics](https://en.wikipedia.org/wiki/SVG)) and `pdf` ([Portable Document +Format](https://en.wikipedia.org/wiki/PDF)). The default is `png` but, as with both DPI and Colormap, these can be +easily changed via a custom configuration file or command line options to change these without having to edit +the [Matplotlib Style file](matplotlib-style). + +If you want to modify the output file format that is used you have two options. Firstly you can [generate a +configuration](generating-a-configuration) file and modify the field `savefig_format` to your choice. The example below +shows changing this from `null` (which defaults to `png` as defined in `topostats.mplstyle`) to `svg`. + +```yaml +plotting: + ... + savefig_format: svg # Options : null (defaults to png) or see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html +``` + +Alternatively it is possible to specify the output image format that is used on the command line using the +`--savefig-format` option to `topostats process`. This will over-ride both the default or any value specified in a +custom configuration you may have set. The following sets this to `svg`. + +```bash +topostats process --savefig-format svg +``` + +**NB** Note that these options are not mutually exclusive and can therefore be combined along with any of the other +options available to `topostats process`. The following would use a DPI of `400`, set the colormap to `rainbow` and the +output format to `svg` when running Topostats and would over-ride options in any custom configuration file or matplotlib +style file. + +```bash +topostats process --savefig-dpi 400 --cmap rainbow --savefig-format svg +``` + [^1] When writing file paths you can use absolute or relative paths. On Windows systems absolute paths start with the drive letter (e.g. `c:/`) on Linux and OSX systems they start with `/`. Relative paths are started either with a `./` which denotes the current directory or one or more `../` which means the higher level directory from the current diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 8cfe848b189..e3174419f15 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -1,4 +1,5 @@ """Tests for the plotting module.""" + import importlib.resources as pkg_resources from pathlib import Path @@ -58,7 +59,7 @@ def test_toposum_class(toposum_object_multiple_directories: TopoSum) -> None: assert isinstance(toposum_object_multiple_directories.image_id, str) assert isinstance(toposum_object_multiple_directories.hist, bool) assert isinstance(toposum_object_multiple_directories.kde, bool) - assert isinstance(toposum_object_multiple_directories.file_ext, str) + assert isinstance(toposum_object_multiple_directories.savefig_format, str) assert isinstance(toposum_object_multiple_directories.output_dir, Path) assert isinstance(toposum_object_multiple_directories.var_to_label, dict) diff --git a/tests/test_plottingfuncs.py b/tests/test_plottingfuncs.py index 696da0f8ff9..7150f5bca08 100644 --- a/tests/test_plottingfuncs.py +++ b/tests/test_plottingfuncs.py @@ -1,4 +1,5 @@ """Tests of plotting functions.""" + from pathlib import Path import matplotlib as mpl @@ -282,7 +283,7 @@ def test_mask_cmap(plotting_config: dict, tmp_path: Path) -> None: @pytest.mark.mpl_image_compare(baseline_dir="resources/img/", savefig_kwargs={"dpi": DPI}) def test_high_dpi(minicircle_grain_gaussian_filter: Grains, plotting_config: dict, tmp_path: Path) -> None: """Test plotting with high DPI.""" - plotting_config["dpi"] = DPI + plotting_config["savefig_dpi"] = DPI fig, _ = Images( data=minicircle_grain_gaussian_filter.images["gaussian_filtered"], output_dir=tmp_path, diff --git a/tests/test_processing.py b/tests/test_processing.py index 0acc0f129e0..f639c3d92ea 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -1,4 +1,5 @@ """Test end-to-end running of topostats.""" + from pathlib import Path import filetype @@ -108,7 +109,7 @@ def test_save_cropped_grains( """Tests if cropped grains are saved only when image set is 'all' rather than 'core'.""" process_scan_config["plotting"]["image_set"] = image_set process_scan_config["plotting"] = update_plotting_config(process_scan_config["plotting"]) - process_scan_config["plotting"]["dpi"] = 50 + process_scan_config["plotting"]["savefig_dpi"] = 50 img_dic = load_scan_data.img_dict _, _, _ = process_scan( @@ -152,7 +153,7 @@ def test_save_cropped_grains( def test_save_format(process_scan_config: dict, load_scan_data: LoadScans, tmp_path: Path, extension: str): """Tests if save format applied to cropped images.""" process_scan_config["plotting"]["image_set"] = "all" - process_scan_config["plotting"]["save_format"] = extension + process_scan_config["plotting"]["savefig_format"] = extension process_scan_config["plotting"] = update_plotting_config(process_scan_config["plotting"]) img_dic = load_scan_data.img_dict diff --git a/topostats/default_config.yaml b/topostats/default_config.yaml index 8dc19d1528e..d66a531adca 100644 --- a/topostats/default_config.yaml +++ b/topostats/default_config.yaml @@ -61,13 +61,15 @@ dnatracing: plotting: run: true # Options : true, false style: topostats.mplstyle # Options : topostats.mplstyle or path to a matplotlibrc params file - save_format: png # Options : see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html + savefig_format: null # Options : null (defaults to png) or see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html + savefig_dpi: null # Options : null (defaults to format) see https://afm-spm.github.io/TopoStats/main/configuration.html#further-customisation and https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html pixel_interpolation: null # Options : https://matplotlib.org/stable/gallery/images_contours_and_fields/interpolation_methods.html image_set: core # Options : all, core zrange: [null, null] # low and high height range for core images (can take [null, null]). low <= high colorbar: true # Options : true, false axes: true # Options : true, false (due to off being a bool when parsed) num_ticks: [null, null] # Number of ticks to have along the x and y axes. Options : null (auto) or integer > 1 + cmap: null # Colormap/colourmap to use (default is 'nanoscope' which is used if null, other options are 'afmhot', 'viridis' etc.) mask_cmap: blu # Options : blu, jet_r and any in matplotlib histogram_log_axis: false # Options : true, false summary_stats: diff --git a/topostats/entry_point.py b/topostats/entry_point.py index a6ccea5c6b8..453fc937e92 100644 --- a/topostats/entry_point.py +++ b/topostats/entry_point.py @@ -2,6 +2,7 @@ Parses command-line arguments and passes input on to the relevant functions / modules. """ + import argparse as arg import sys @@ -10,6 +11,8 @@ from topostats.plotting import run_toposum from topostats.run_topostats import run_topostats +# pylint: disable=too-many-statements + def create_parser() -> arg.ArgumentParser: """Create a parser for reading options.""" @@ -29,8 +32,8 @@ def create_parser() -> arg.ArgumentParser: # Create a sub-parsers for different stages of processing and tasks process_parser = subparsers.add_parser( "process", - description="Process AFM images. Additional arguments over-ride those in the configuration file.", - help="Process AFM images. Additional arguments over-ride those in the configuration file.", + description="Process AFM images. Additional arguments over-ride defaults or those in the configuration file.", + help="Process AFM images. Additional arguments over-ride defaults or those in the configuration file.", ) process_parser.add_argument( "-c", @@ -106,6 +109,27 @@ def create_parser() -> arg.ArgumentParser: required=False, help="Whether to save plots.", ) + process_parser.add_argument( + "--savefig-format", + dest="savefig_format", + type=str, + required=False, + help="Format for saving figures to, options are 'png', 'svg', or other valid Matplotlib supported formats.", + ) + process_parser.add_argument( + "--savefig-dpi", + dest="savefig_dpi", + type=int, + required=False, + help="Dots Per Inch for plots, should be integer for dots per inch.", + ) + process_parser.add_argument( + "--cmap", + dest="cmap", + type=str, + required=False, + help="Colormap to use, options include 'nanoscope', 'afmhot' and any valid Matplotlib colormap.", + ) process_parser.add_argument("-m", "--mask", dest="mask", type=bool, required=False, help="Mask the image.") process_parser.add_argument( "-w", @@ -151,6 +175,13 @@ def create_parser() -> arg.ArgumentParser: required=False, help="Filename to write a sample YAML label file to (should end in '.yaml').", ) + toposum_parser.add_argument( + "--savefig-format", + dest="savefig_format", + type=str, + required=False, + help="Format for saving figures to, options are 'png', 'svg', or other valid Matplotlib supported formats.", + ) toposum_parser.set_defaults(func=run_toposum) load_parser = subparsers.add_parser( diff --git a/topostats/plotting.py b/topostats/plotting.py index b65ba7f9b8e..6a5746b8357 100644 --- a/topostats/plotting.py +++ b/topostats/plotting.py @@ -1,4 +1,5 @@ """Plotting and summary of TopoStats output statistics.""" + from collections import defaultdict import importlib.resources as pkg_resources @@ -43,7 +44,7 @@ def __init__( figsize: tuple = (16, 9), alpha: float = 0.5, palette: str = "deep", - file_ext: str = "png", + savefig_format: str = "png", output_dir: Union[str, Path] = ".", var_to_label: dict = None, hue: str = "basename", @@ -105,7 +106,7 @@ def __init__( self.figsize = figsize self.alpha = alpha self.palette = palette - self.file_ext = file_ext + self.savefig_format = savefig_format self.output_dir = Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) self.var_to_label = var_to_label @@ -264,9 +265,10 @@ def save_plot(self, outfile: Path) -> None: outfile: str Output file name to save figure to. """ - plt.savefig(self.output_dir / f"{outfile}.{self.file_ext}") + plt.savefig(self.output_dir / f"{outfile}.{self.savefig_format}") LOGGER.info( - f"[plotting] Plotted {self.stat_to_sum} to : " f"{str(self.output_dir / f'{outfile}.{self.file_ext}')}" + f"[plotting] Plotted {self.stat_to_sum} to : " + f"{str(self.output_dir / f'{outfile}.{self.savefig_format}')}" ) def _set_label(self, var: str): diff --git a/topostats/plotting_dictionary.yaml b/topostats/plotting_dictionary.yaml index b88e6ff072e..e7b02118805 100644 --- a/topostats/plotting_dictionary.yaml +++ b/topostats/plotting_dictionary.yaml @@ -9,192 +9,192 @@ # | filename | String | Filename (minus extension) to which image is saved. | # | title | String | Title for the plot | # | image_type | String | Whether the plot includes the height (non-binary) or the outline (binary) | -# | dpi | int | Dots Per Inch for plotting | +# | savefig_dpi | int | Dots Per Inch for plotting | # | core_set | Boolean | Whether a plot is considered part of the core set of images that are plotted.| extracted_channel: filename: "00-raw_heightmap" title: "Raw Height" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false pixels: filename: "01-pixels" title: "Pixels" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false initial_median_flatten: filename: "02-initial_median_flatten_unmasked" title: "Initial Alignment (Unmasked)" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false initial_tilt_removal: filename: "03-initial_tilt_removal_unmasked" title: "Initial Tilt Removal (Unmasked)" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false initial_quadratic_removal: filename: "04-initial_quadratic_removal_unmasked" title: "Initial Quadratic Removal (Unmasked)" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false initial_nonlinear_polynomial_removal: filename: "05-nonlinear_polynomial_removal_unmasked" title: "Nonlinear polynomial removal (Unmasked)" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false initial_scar_removal: filename: "06-initial_scar_removal" title: "Scar removal" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false initial_zero_average_background: filename: "7-initial_zero_average_background" title: "Initial Zero Averaged Background" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false mask: filename: "08-binary_mask" title: "Binary Mask" image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false masked_median_flatten: filename: "09-secondary_align_masked" title: "Secondary Alignment (Masked)" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false masked_tilt_removal: filename: "10-secondary_tilt_removal_masked" title: "Secondary Tilt Removal (Masked)" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false masked_quadratic_removal: filename: "11-quadratic_removal_masked" title: "Secondary Quadratic Removal" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false masked_nonlinear_polynomial_removal: filename: "12-nonlinear_polynomial_removal_masked" title: "Nonlinear polynomial removal masked" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false secondary_scar_removal: filename: "13-scar_removal" title: "Secondary scar removal" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false scar_mask: filename: "14-scar_mask" title: "Scar mask" image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false final_zero_average_background: filename: "15-final_zero_average_background" title: "Final Zero Averaged Background" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false gaussian_filtered: filename: "16-gaussian_filtered" title: "Gaussian Filtered" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false z_threshed: title: "Height Thresholded" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: true mask_grains: filename: "17-mask_grains" title: "Mask for Grains" image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false labelled_regions_01: filename: "18-labelled_regions" title: "Labelled Regions" image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false tidied_border: filename: "19-tidy_borders" title: "Tidied Borders" image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false removed_noise: filename: "20-noise_removed" title: "Noise removed" image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false removed_small_objects: filename: "21-small_objects_removed" title: "Small Objects Removed" image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false mask_overlay: title: "Masked Objects" image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: true labelled_regions_02: filename: "22-labelled_regions" title: "Labelled Regions" image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false coloured_regions: filename: "23-coloured_regions" title: "Coloured Regions" image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false bounding_boxes: filename: "24-bounding_boxes" title: "Bounding Boxes" image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false coloured_boxes: filename: "25-labelled_image_bboxes" title: "Labelled Image with Bounding Boxes" image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false all_molecule_traces: title: "Molecule Traces" image_type: "non-binary" - dpi: 800 + savefig_dpi: 800 core_set: true grain_image: image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false grain_mask: image_type: "binary" - dpi: 100 + savefig_dpi: 100 core_set: false grain_mask_image: image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false single_molecule_trace: image_type: "non-binary" - dpi: 100 + savefig_dpi: 100 core_set: false diff --git a/topostats/plottingfuncs.py b/topostats/plottingfuncs.py index c585b5c1242..c4773701bb8 100644 --- a/topostats/plottingfuncs.py +++ b/topostats/plottingfuncs.py @@ -1,4 +1,5 @@ """Plotting data.""" + from __future__ import annotations import importlib.resources as pkg_resources @@ -119,10 +120,10 @@ def __init__( axes: bool = True, num_ticks: list[int | None, int | None] = (None, None), save: bool = True, - save_format: str = None, + savefig_format: str | None = None, histogram_log_axis: bool = True, histogram_bins: int | None = None, - dpi: str | float | None = None, + savefig_dpi: str | float | None = None, ) -> None: """ Initialise the class. @@ -173,7 +174,7 @@ def __init__( Optionally use a logarithmic y axis for the histogram plots. histogram_bin: int Number of bins for histograms to use. - dpi: Union[str, float] + savefig_dpi: Union[str, float] The resolution of the saved plot (default 'figure'). """ if style is None: @@ -200,10 +201,10 @@ def __init__( self.axes = axes self.num_ticks = num_ticks self.save = save - self.save_format = mpl.rcParams["savefig.format"] if save_format is None else save_format + self.savefig_format = mpl.rcParams["savefig.format"] if savefig_format is None else savefig_format self.histogram_log_axis = histogram_log_axis self.histogram_bins = mpl.rcParams["hist.bins"] if histogram_bins is None else histogram_bins - self.dpi = mpl.rcParams["savefig.dpi"] if dpi is None else dpi + self.savefig_dpi = mpl.rcParams["savefig.dpi"] if savefig_dpi is None else savefig_dpi def plot_histogram_and_save(self): """ @@ -227,10 +228,10 @@ def plot_histogram_and_save(self): ax.set_ylabel("frequency in image") plt.title(self.title) plt.savefig( - (self.output_dir / f"{self.filename}_histogram.{self.save_format}"), + (self.output_dir / f"{self.filename}_histogram.{self.savefig_format}"), bbox_inches="tight", pad_inches=0.5, - dpi=self.dpi, + dpi=self.savefig_dpi, ) plt.close() @@ -259,8 +260,8 @@ def plot_and_save(self): else: self.save_array_figure() LOGGER.info( - f"[{self.filename}] : Image saved to : {str(self.output_dir / self.filename)}.{self.save_format}\ - | DPI: {self.dpi}" + f"[{self.filename}] : Image saved to : {str(self.output_dir / self.filename)}.{self.savefig_format}\ + | DPI: {self.savefig_dpi}" ) return fig, ax @@ -325,13 +326,13 @@ def save_figure(self): plt.title("") fig.frameon = False plt.savefig( - (self.output_dir / f"{self.filename}.{self.save_format}"), + (self.output_dir / f"{self.filename}.{self.savefig_format}"), bbox_inches="tight", pad_inches=0, - dpi=self.dpi, + dpi=self.savefig_dpi, ) else: - plt.savefig((self.output_dir / f"{self.filename}.{self.save_format}"), dpi=self.dpi) + plt.savefig((self.output_dir / f"{self.filename}.{self.savefig_format}"), dpi=self.savefig_dpi) else: plt.xlabel("Nanometres") plt.ylabel("Nanometres") @@ -347,12 +348,12 @@ def save_figure(self): def save_array_figure(self) -> None: """Save the image array as an image using plt.imsave().""" plt.imsave( - (self.output_dir / f"{self.filename}.{self.save_format}"), + (self.output_dir / f"{self.filename}.{self.savefig_format}"), self.data, cmap=self.cmap, vmin=self.zrange[0], vmax=self.zrange[1], - format=self.save_format, + format=self.savefig_format, ) plt.close() diff --git a/topostats/processing.py b/topostats/processing.py index 1cf82c7403e..434a94626e4 100644 --- a/topostats/processing.py +++ b/topostats/processing.py @@ -1,4 +1,5 @@ """Functions for processing data.""" + from __future__ import annotations from collections import defaultdict @@ -718,9 +719,12 @@ def completion_message(config: dict, img_files: list, summary_config: dict, imag f" File Extension : {config['file_ext']}\n" f" Files Found : {len(img_files)}\n" f" Successfully Processed^1 : {images_processed} ({(images_processed * 100) / len(img_files)}%)\n" - f" Configuration : {config['output_dir']}/config.yaml\n" f" All statistics : {str(config['output_dir'])}/all_statistics.csv\n" f" Distribution Plots : {distribution_plots_message}\n\n" + f" Configuration : {config['output_dir']}/config.yaml\n" + f" DPI : {config['plotting']['savefig_dpi']}\n" + f" Output image format : {config['plotting']['savefig_format']}\n" + f" Colormap : {config['plotting']['cmap']}\n\n" f" Email : topostats@sheffield.ac.uk\n" f" Documentation : https://afm-spm.github.io/topostats/\n" f" Source Code : https://github.com/AFM-SPM/TopoStats/\n" diff --git a/topostats/run_topostats.py b/topostats/run_topostats.py index c5ade451c51..ecb5d6cbe10 100644 --- a/topostats/run_topostats.py +++ b/topostats/run_topostats.py @@ -2,6 +2,7 @@ This provides an entry point for running TopoStats as a command line programme. """ + import importlib.resources as pkg_resources import logging import sys @@ -67,12 +68,13 @@ def run_topostats(args=None): # noqa: C901 # Create base output directory config["output_dir"].mkdir(parents=True, exist_ok=True) - # Load plotting_dictionary and validate + # Load plotting_dictionary and validate then update with command line options plotting_dictionary = pkg_resources.open_text(__package__, "plotting_dictionary.yaml") config["plotting"]["plot_dict"] = yaml.safe_load(plotting_dictionary.read()) validate_config( config["plotting"]["plot_dict"], schema=PLOTTING_SCHEMA, config_type="YAML plotting configuration file" ) + config["plotting"] = update_config(config["plotting"], args) # Check earlier stages of processing are enabled for later. check_run_steps( @@ -81,8 +83,9 @@ def run_topostats(args=None): # noqa: C901 grainstats_run=config["grainstats"]["run"], dnatracing_run=config["dnatracing"]["run"], ) - # Update the config["plotting"]["plot_dict"] with plotting options + # Ensures each image has all plotting options which are passed as **kwargs config["plotting"] = update_plotting_config(config["plotting"]) + LOGGER.debug(f"Plotting configuration after update :\n{pformat(config['plotting'], indent=4)}") LOGGER.info(f"Configuration file loaded from : {args.config_file}") LOGGER.info(f"Scanning for images in : {config['base_dir']}") @@ -94,8 +97,11 @@ def run_topostats(args=None): # noqa: C901 LOGGER.error(f"No images with extension {config['file_ext']} in {config['base_dir']}") LOGGER.error("Please check your configuration and directories.") sys.exit() - LOGGER.info(f'Thresholding method (Filtering) : {config["filter"]["threshold_method"]}') - LOGGER.info(f'Thresholding method (Grains) : {config["grains"]["threshold_method"]}') + LOGGER.info(f"Thresholding method (Filtering) : {config['filter']['threshold_method']}") + LOGGER.info(f"Thresholding method (Grains) : {config['grains']['threshold_method']}") + LOGGER.info(f"DPI : {config['plotting']['savefig_dpi']}") + LOGGER.info(f"Output image format : {config['plotting']['savefig_format']}") + LOGGER.info(f"Colormap : {config['plotting']['cmap']}") LOGGER.debug(f"Configuration after update : \n{pformat(config, indent=4)}") # noqa : T203 processing_function = partial( @@ -161,7 +167,7 @@ def run_topostats(args=None): # noqa: C901 summary_config = yaml.safe_load(summary_yaml.read()) # Do not pass command line arguments to toposum as they clash with process command line arguments - summary_config = update_config(summary_config, {}) + summary_config = update_config(summary_config, config["plotting"]) validate_config(summary_config, SUMMARY_SCHEMA, config_type="YAML summarisation config") # We never want to load data from CSV as we are using the data that has just been processed. diff --git a/topostats/summary_config.yaml b/topostats/summary_config.yaml index 0ac5d9601f7..03bd538954d 100644 --- a/topostats/summary_config.yaml +++ b/topostats/summary_config.yaml @@ -1,7 +1,7 @@ base_dir: ./ # Directory from which all files and directories are relative to ("./" is the default current directory) output_dir: ./output/summary_distributions csv_file: ./all_statistics.csv -file_ext: png +savefig_format: png pickle_plots: True # Save plots to a Python pickle var_to_label: null # Optional YAML file that maps variable names to labels, uses topostats/var_to_label.yaml if null molecule_id: molecule_number diff --git a/topostats/topostats.mplstyle b/topostats/topostats.mplstyle index cb89bc1b69a..c14f45baa42 100644 --- a/topostats/topostats.mplstyle +++ b/topostats/topostats.mplstyle @@ -1,21 +1,22 @@ #### MATPLOTLIBRC FORMAT -## NOTE FOR END USERS: DO NOT EDIT THIS FILE! +## This is a Matplotlib configuration file for TopoStats. ## -## This is the Matplotlib configuration file for TopoStats - you can find a copy -## of it on your system in site-packages/TopoStats/topostats/images.mplstyle -## (relative to your Python installation location). -## DO NOT EDIT IT! +## You have probably made a copy of this using 'topostats create-matplotlibrc' +## or a variant there of. If so you are free to edit this file, if you haven't +## done this please do so and edit the copy you create. For options on creating +## copies of this file see +## +## topostats create-matplotlibrc --help ## ## Fields that have been customised for TopoStats are uncommented, others are the ## default Matplotlib values. ## -## If you wish to change your default style, copy this file to your work directory -## you MUST rename it to something _other_ than topostats.mplstyle. Edit fields as -## required and include it by either editing the style field in config.yaml to point -## to the desired file or with the command line option --matplotlibrc -## +## Once you have saved changes to your copy you can run topostats with it using +## the --matplotlibrc option. If for example your copy is 'my_custom.mplstyle' +## you would use it with the following ## +## topostats process --matplotlibrc my_custom.mplstyle ## ## See https://matplotlib.org/stable/users/explain/customizing.html#customizing-with-matplotlibrc-files ## for more details on the paths which are checked for the configuration file. @@ -603,7 +604,7 @@ figure.figsize: 8.0, 8.0 # figure size in inches ## *************************************************************************** #image.aspect: equal # {equal, auto} or a number #image.interpolation: antialiased # see help(imshow) for options -image.cmap: nanoscope # A colormap name (nanoscope, afmhot, viridis etc.) +image.cmap: nanoscope # A colormap name (nanoscope, afmhot, viridis etc.) #image.lut: 256 # the size of the colormap lookup table #image.origin: upper # {lower, upper} #image.resample: True diff --git a/topostats/utils.py b/topostats/utils.py index b1726e2aace..964f2a96679 100644 --- a/topostats/utils.py +++ b/topostats/utils.py @@ -1,4 +1,5 @@ """Utilities.""" + from __future__ import annotations import logging diff --git a/topostats/validation.py b/topostats/validation.py index 80e5953299e..73810e032b4 100644 --- a/topostats/validation.py +++ b/topostats/validation.py @@ -1,4 +1,5 @@ """Validation of configuration.""" + import logging import os from pathlib import Path @@ -229,7 +230,17 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: error="Invalid value in config for 'plotting.style', valid values are 'topostats.mplstyle' or None", ), ), - "save_format": str, + "savefig_format": Or( + None, + str, + error="Invalid value in config for plotting.savefig_format" "must be a value supported by Matplotlib.", + ), + "savefig_dpi": Or( + None, + "figure", + int, + error="Invalid value in config for plotting.savefig_dpi, valid" "values are 'figure' or integers", + ), "image_set": Or( "all", "core", @@ -271,6 +282,13 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: [None, And(int, lambda n: n > 1)], error="Invalid value in config plotting.for 'num_ticks', valid values are 'null' or integers > 1", ), + "cmap": Or( + None, + "afmhot", + "nanoscope", + "gwyddion", + error="Invalid value in config for 'plotting.cmap', valid values are 'afmhot', 'nanoscope' or 'gwyddion'", + ), "mask_cmap": str, "histogram_log_axis": Or( True, @@ -314,7 +332,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -329,7 +347,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: error="Invalid value in config 'pixels.image_type', valid values are 'binary' or 'non-binary'", ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -347,7 +365,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -365,7 +383,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -383,7 +401,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -401,7 +419,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -419,7 +437,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -437,7 +455,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -452,7 +470,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: error="Invalid value in config 'mask.image_type', valid values are 'binary' or 'non-binary'", ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -470,7 +488,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -488,7 +506,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -506,7 +524,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -524,7 +542,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -542,7 +560,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -560,7 +578,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -578,7 +596,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -596,7 +614,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -612,7 +630,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": True, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -629,7 +647,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -647,7 +665,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -664,7 +682,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -681,7 +699,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -699,7 +717,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -715,7 +733,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": True, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -733,7 +751,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -751,7 +769,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -768,7 +786,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -785,7 +803,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -802,7 +820,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -817,7 +835,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": False, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -830,7 +848,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: error=("Invalid value in config 'grain_mask.image_type', valid values " "are 'binary' or 'non-binary'"), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -846,7 +864,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -862,7 +880,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), ), "core_set": bool, - "dpi": Or( + "savefig_dpi": Or( lambda n: n > 0, "figure", error="Invalid value in config for 'dpi', valid values are 'figure' or > 0.", @@ -876,11 +894,11 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: "base_dir": Path, "output_dir": Path, "csv_file": str, - "file_ext": Or( + "savefig_format": Or( "png", "pdf", "svg", - error=("Invalid value in config 'file_ext', valid values are 'png', 'pdf' or 'svg' "), + error=("Invalid value in config 'savefig_format', valid values are 'png', 'pdf' or 'svg' "), ), "pickle_plots": Or( True, From 8bae05c9cd60731b937f96786da4365bc28d4c70 Mon Sep 17 00:00:00 2001 From: Neil Shephard Date: Fri, 12 Jan 2024 12:01:12 +0000 Subject: [PATCH 2/8] Restore cmap to config; simpler config of cmap/dpi/image format Closes #776 + Aligns command line and configuration field names with those in matplotlibrc files. + Restores the `cmap` configuration option to `default_config.yaml` and introduces `savefig_dpi` option. + Adds command line options for setting DPI (`--savefig-dpi`), Colormap (`--cmap`)and Output file format (`--savefig-format`). + Expands documentation on how to use custom configuration files or command line options to set the DPI/Colormap/Output format. + Updates the header to `topostats.mplstyle` to explain how to use it as typically users will have created a copy of the file (after the convenience function `topostats create-matplotlibrc` was introduced with #773). + To achieve this the dictionary `config["plotting"]` needed explicitly updating as the `update_config()` function doesn't update nested configurations (since this is the first PR that introduces command line options that modify any of the values in the nested dictionaries). + Updates options for `topostats toposum`` to align with `savefig_format` and adds flag to entry point so output format is consistent. + Updates and expands the configuration documentation explaining how to use these conveniences. As a consequence quite a few files are touched to ensure that validation and processing functions all have variables that align with those in the configuration. If users could test this it would be very much appreciated, if you use the Git installed version something like the following would switch branches and allow you test it. ``` conda create --name topostats-config # Create and activate a virtual env specific to this conda activate topostats-config cd ~/path/to/TopoStats git pull git checkout ns-rse/776-config-jigging pip install -e . topostats process --output-dir base topostats create-config test_config.yaml # Create test_config.yaml to try changing parameters topostats process --config test_config.yaml --output-dir test1 topostats process --output-dir test2 --savefig-dpi 10 --cmap rainbow --savefig-format svg topostats process --config test_config.yaml --output-dir test3 --savefig-dpi 80 --cmap viridis --savefig-format pdf ``` Each invocation of `topostats process` will save output to its own directory (either `base`, `test1`, `test2` and `test3`) for comparison. There should be differences between each `base` the values used in `test_config.yaml` and saved under `test1` and those under `test2` and `test3` should also differ. I would really appreciate feedback on the documentation as without clear documentation it is perhaps confusing how the components interact and work and can be modified and getting this as clear as possible will be really helpful. --- docs/configuration.md | 2 ++ topostats/default_config.yaml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index 97c266c3070..fd6abbd06f2 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -185,6 +185,8 @@ z_threshed: core_set: true ``` +Whilst it is possible to edit this file it is not recommended to do so. + The following section describes how to override the DPI settings defined in this file and change the global `cmap` (colormap/colourmap) used in plotting and output format. diff --git a/topostats/default_config.yaml b/topostats/default_config.yaml index d66a531adca..0a078da0448 100644 --- a/topostats/default_config.yaml +++ b/topostats/default_config.yaml @@ -62,7 +62,7 @@ plotting: run: true # Options : true, false style: topostats.mplstyle # Options : topostats.mplstyle or path to a matplotlibrc params file savefig_format: null # Options : null (defaults to png) or see https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html - savefig_dpi: null # Options : null (defaults to format) see https://afm-spm.github.io/TopoStats/main/configuration.html#further-customisation and https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html + savefig_dpi: null # Options : null (defaults to figure) see https://afm-spm.github.io/TopoStats/main/configuration.html#further-customisation and https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html pixel_interpolation: null # Options : https://matplotlib.org/stable/gallery/images_contours_and_fields/interpolation_methods.html image_set: core # Options : all, core zrange: [null, null] # low and high height range for core images (can take [null, null]). low <= high From 97ecb2cea3443fd6d5b9a6767b068371d7b8a470 Mon Sep 17 00:00:00 2001 From: Neil Shephard Date: Mon, 22 Jan 2024 11:52:17 +0000 Subject: [PATCH 3/8] Update docs/configuration.md Co-authored-by: Max Gamill <91465918+MaxGamill-Sheffield@users.noreply.github.com> --- docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index fd6abbd06f2..21287a900b4 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -94,7 +94,7 @@ Aside from the comments in YAML file itself the fields are described below. | `plotting` | `run` | boolean | `true` | Whether to run plotting. Options : `true`, `false` | | | `style` | str | `topostats.mplstyle` | The default loads a custom [matplotlibrc param file](https://matplotlib.org/stable/users/explain/customizing.html#the-matplotlibrc-file) that comes with TopoStats. Users can specify the path to their own style file as an alternative. | | | `save_format` | string | `null` | Format to save images in, `null` defaults to `png` see [matplotlib.pyplot.savefig](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html) | -| | `savefig_dpi` | string / float | `null` | Dots Per Inch (DPI), if `null` then the value `figure` is used, for other values (typically integers) see [#further-customisation] and [Matplotlib](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html). | +| | `savefig_dpi` | string / float | `null` | Dots Per Inch (DPI), if `null` then the value `figure` is used, for other values (typically integers) see [#further-customisation] and [Matplotlib](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html). Low DPI's improve processing time but can reduce the plotted trace (but not the actual trace) accuracy. | | | `pixel_interpolation` | string | `null` | Interpolation method for image plots. Recommended default 'null' prevents banding that occurs in some images. If interpolation is needed, we recommend `gaussian`. See [matplotlib imshow interpolations documentation](https://matplotlib.org/stable/gallery/images_contours_and_fields/interpolation_methods.html) for details. | | | `image_set` | string | `all` | Which images to plot. Options : `all`, `core` | | | `zrange` | list | `[0, 3]` | Low (first number) and high (second number) height range for core images (can take [null, null]). **NB** `low <= high` otherwise you will see a `ValueError: minvalue must be less than or equal to maxvalue` error. | From b3ff565382a0e70d0df45632bb32d31f7ce6c9a2 Mon Sep 17 00:00:00 2001 From: Neil Shephard Date: Mon, 22 Jan 2024 11:52:33 +0000 Subject: [PATCH 4/8] Update topostats/validation.py Co-authored-by: Max Gamill <91465918+MaxGamill-Sheffield@users.noreply.github.com> --- topostats/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/topostats/validation.py b/topostats/validation.py index 73810e032b4..bf2a6e8bc95 100644 --- a/topostats/validation.py +++ b/topostats/validation.py @@ -238,7 +238,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: "savefig_dpi": Or( None, "figure", - int, + lambda n: n > 0, error="Invalid value in config for plotting.savefig_dpi, valid" "values are 'figure' or integers", ), "image_set": Or( From f22eeec97b754213ea0c65d8091a452631d7badd Mon Sep 17 00:00:00 2001 From: Neil Shephard Date: Mon, 22 Jan 2024 11:52:49 +0000 Subject: [PATCH 5/8] Update topostats/topostats.mplstyle Co-authored-by: Max Gamill <91465918+MaxGamill-Sheffield@users.noreply.github.com> --- topostats/topostats.mplstyle | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/topostats/topostats.mplstyle b/topostats/topostats.mplstyle index c14f45baa42..81bf60010a9 100644 --- a/topostats/topostats.mplstyle +++ b/topostats/topostats.mplstyle @@ -13,8 +13,9 @@ ## default Matplotlib values. ## ## Once you have saved changes to your copy you can run topostats with it using -## the --matplotlibrc option. If for example your copy is 'my_custom.mplstyle' -## you would use it with the following +## the --matplotlibrc option or updating the sytle sheet path in the plotting section +## of the configuration file. If for example your copy is 'my_custom.mplstyle' you +## would use it with the following ## ## topostats process --matplotlibrc my_custom.mplstyle ## From 0d6d37e463f4b5cff093df4f1e7e58664787c715 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jan 2024 11:53:13 +0000 Subject: [PATCH 6/8] [pre-commit.ci] Fixing issues with pre-commit --- docs/configuration.md | 104 +++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 21287a900b4..4adf202858f 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -54,58 +54,58 @@ above: Aside from the comments in YAML file itself the fields are described below. -| Section | Sub-Section | Data Type | Default | Description | -| :-------------- | :-------------------------------- | :------------- | :-------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `base_dir` | | string | `./` | Directory to recursively search for files within.[^1] | -| `output_dir` | | string | `./output` | Directory that output should be saved to.[^1] | -| `log_level` | | string | `info` | Verbosity of logging, options are (in increasing order) `warning`, `error`, `info`, `debug`. | -| `cores` | | integer | `2` | Number of cores to run parallel processes on. | -| `file_ext` | | string | `.spm` | File extensions to search for. | -| `loading` | `channel` | string | `Height` | The channel of data to be processed, what this is will depend on the file-format you are processing and the channel you wish to process. | -| `filter` | `run` | boolean | `true` | Whether to run the filtering stage, without this other stages won't run so leave as `true`. | -| | `threshold_method` | str | `std_dev` | Threshold method for filtering, options are `ostu`, `std_dev` or `absolute`. | -| | `otsu_threshold_multiplier` | float | `1.0` | Factor by which the derived Otsu Threshold should be scaled. | -| | `threshold_std_dev` | dictionary | `10.0, 1.0` | A pair of values that scale the standard deviation, after scaling the standard deviation `below` is subtracted from the image mean to give the below/lower threshold and the `above` is added to the image mean to give the above/upper threshold. These values should _always_ be positive. | -| | `threshold_absolute` | dictionary | `-1.0, 1.0` | Below (first) and above (second) absolute threshold for separating data from the image background. | -| | `gaussian_size` | float | `0.5` | The number of standard deviations to build the Gaussian kernel and thus affects the degree of blurring. See [skimage.filters.gaussian](https://scikit-image.org/docs/dev/api/skimage.filters.html#skimage.filters.gaussian) and `sigma` for more information. | -| | `gaussian_mode` | string | `nearest` | | -| `grains` | `run` | boolean | `true` | Whether to run grain finding. Options `true`, `false` | -| | `row_alignment_quantile` | float | `0.5` | Quantile (0.0 to 1.0) to be used to determine the average background for the image. below values may improve flattening of large features. | -| | `smallest_grain_size_nm2` | int | `100` | The smallest size of grains to be included (in nm^2), anything smaller than this is considered noise and removed. **NB** must be `> 0.0`. | -| | `threshold_method` | float | `std_dev` | Threshold method for grain finding. Options : `otsu`, `std_dev`, `absolute` | -| | `otsu_threshold_multiplier` | | `1.0` | Factor by which the derived Otsu Threshold should be scaled. | -| | `threshold_std_dev` | dictionary | `10.0, 1.0` | A pair of values that scale the standard deviation, after scaling the standard deviation `below` is subtracted from the image mean to give the below/lower threshold and the `above` is added to the image mean to give the above/upper threshold. These values should _always_ be positive. | -| | `threshold_absolute` | dictionary | `-1.0, 1.0` | Below (first), above (second) absolute threshold for separating grains from the image background. | -| | `direction` | | `above` | Defines whether to look for grains above or below thresholds or both. Options: `above`, `below`, `both` | -| | `smallest_grain_size` | int | `50` | Catch-all value for the minimum size of grains. Measured in nanometres squared. All grains with area below than this value are removed. | -| | `absolute_area_threshold` | dictionary | `[300, 3000], [null, null]` | Area thresholds for above the image background (first) and below the image background (second), which grain sizes are permitted, measured in nanometres squared. All grains outside this area range are removed. | -| | `remove_edge_intersecting_grains` | boolean | `true` | Whether to remove grains that intersect the image border. _Do not change this unless you know what you are doing_. This will ruin any statistics relating to grain size, shape and DNA traces. | -| `grainstats` | `run` | boolean | `true` | Whether to calculate grain statistics. Options : `true`, `false` | -| | `cropped_size` | float | `40.0` | Force cropping of grains to this length (in nm) of square cropped images (can take `-1` for grain-sized box) | -| | `edge_detection_method` | str | `binary_erosion` | Type of edge detection method to use when determining the edges of grain masks before calculating statistics on them. Options : `binary_erosion`, `canny`. | -| `dnatracing` | `run` | boolean | `true` | Whether to run DNA Tracing. Options : true, false | -| | `min_skeleton_size` | int | `10` | The minimum number of pixels a skeleton should be for statistics to be calculated on it. Anything smaller than this is dropped but grain statistics are retained. | -| | `skeletonisation_method` | str | `topostats` | Skeletonisation method to use, possible options are `zhang`, `lee`, `thin` (from [Scikit-image Morphology module](https://scikit-image.org/docs/stable/api/skimage.morphology.html)) or the original bespoke TopoStas method `topostats`. | -| | `spline_step_size` | float | `7.0e-9` | The sampling rate of the spline in metres. This is the frequency at which points are sampled from fitted traces to act as guide points for the splining process using scipy's splprep. | -| | `spline_linear_smoothing` | float | `5.0` | The amount of smoothing to apply to splines of linear molecule traces. | -| | `spline_circular_smoothing` | float | `0.0` | The amount of smoothing to apply to splines of circular molecule traces. | -| | `pad_width` | int | 10 | Padding for individual grains when tracing. This is sometimes required if the bounding box around grains is too tight and they touch the edge of the image. | -| | `cores` | int | 1 | Number of cores to use for tracing. **NB** Currently this is NOT used and should be left commented in the YAML file. | -| `plotting` | `run` | boolean | `true` | Whether to run plotting. Options : `true`, `false` | -| | `style` | str | `topostats.mplstyle` | The default loads a custom [matplotlibrc param file](https://matplotlib.org/stable/users/explain/customizing.html#the-matplotlibrc-file) that comes with TopoStats. Users can specify the path to their own style file as an alternative. | -| | `save_format` | string | `null` | Format to save images in, `null` defaults to `png` see [matplotlib.pyplot.savefig](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html) | -| | `savefig_dpi` | string / float | `null` | Dots Per Inch (DPI), if `null` then the value `figure` is used, for other values (typically integers) see [#further-customisation] and [Matplotlib](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html). Low DPI's improve processing time but can reduce the plotted trace (but not the actual trace) accuracy. | -| | `pixel_interpolation` | string | `null` | Interpolation method for image plots. Recommended default 'null' prevents banding that occurs in some images. If interpolation is needed, we recommend `gaussian`. See [matplotlib imshow interpolations documentation](https://matplotlib.org/stable/gallery/images_contours_and_fields/interpolation_methods.html) for details. | -| | `image_set` | string | `all` | Which images to plot. Options : `all`, `core` | -| | `zrange` | list | `[0, 3]` | Low (first number) and high (second number) height range for core images (can take [null, null]). **NB** `low <= high` otherwise you will see a `ValueError: minvalue must be less than or equal to maxvalue` error. | -| | `colorbar` | boolean | `true` | Whether to include the colorbar scale in plots. Options `true`, `false` | -| | `axes` | boolean | `true` | Whether to include the axes in the produced plots. | -| | `num_ticks` | null / int | `null` | Number of ticks to have along the x and y axes. Options : `null` (auto) or an integer >1 | -| | `cmap` | string | `null` | Colormap/colourmap to use (defaults to 'nanoscope' if null (defined in `topostats/topostats.mplstyle`). Other options are 'afmhot', 'viridis' etc., see [Matplotlib : Choosing Colormaps](https://matplotlib.org/stable/users/explain/colors/colormaps.html). | -| | `mask_cmap` | string | `blu` | Color used when masking regions. Options `blu`, `jet_r` or any valid Matplotlib colour. | -| | `histogram_log_axis` | boolean | `false` | Whether to plot hisograms using a logarithmic scale or not. Options: `true`, `false`. | -| `summary_stats` | `run` | boolean | `true` | Whether to generate summary statistical plots of the distribution of different metrics grouped by the image that has been processed. | -| | `config` | str | `null` | Path to a summary config YAML file that configures/controls how plotting is done. If one is not specified either the command line argument `--summary_config` value will be used or if that option is not invoked the default `topostats/summary_config.yaml` will be used. | +| Section | Sub-Section | Data Type | Default | Description | +| :-------------- | :-------------------------------- | :------------- | :-------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `base_dir` | | string | `./` | Directory to recursively search for files within.[^1] | +| `output_dir` | | string | `./output` | Directory that output should be saved to.[^1] | +| `log_level` | | string | `info` | Verbosity of logging, options are (in increasing order) `warning`, `error`, `info`, `debug`. | +| `cores` | | integer | `2` | Number of cores to run parallel processes on. | +| `file_ext` | | string | `.spm` | File extensions to search for. | +| `loading` | `channel` | string | `Height` | The channel of data to be processed, what this is will depend on the file-format you are processing and the channel you wish to process. | +| `filter` | `run` | boolean | `true` | Whether to run the filtering stage, without this other stages won't run so leave as `true`. | +| | `threshold_method` | str | `std_dev` | Threshold method for filtering, options are `ostu`, `std_dev` or `absolute`. | +| | `otsu_threshold_multiplier` | float | `1.0` | Factor by which the derived Otsu Threshold should be scaled. | +| | `threshold_std_dev` | dictionary | `10.0, 1.0` | A pair of values that scale the standard deviation, after scaling the standard deviation `below` is subtracted from the image mean to give the below/lower threshold and the `above` is added to the image mean to give the above/upper threshold. These values should _always_ be positive. | +| | `threshold_absolute` | dictionary | `-1.0, 1.0` | Below (first) and above (second) absolute threshold for separating data from the image background. | +| | `gaussian_size` | float | `0.5` | The number of standard deviations to build the Gaussian kernel and thus affects the degree of blurring. See [skimage.filters.gaussian](https://scikit-image.org/docs/dev/api/skimage.filters.html#skimage.filters.gaussian) and `sigma` for more information. | +| | `gaussian_mode` | string | `nearest` | | +| `grains` | `run` | boolean | `true` | Whether to run grain finding. Options `true`, `false` | +| | `row_alignment_quantile` | float | `0.5` | Quantile (0.0 to 1.0) to be used to determine the average background for the image. below values may improve flattening of large features. | +| | `smallest_grain_size_nm2` | int | `100` | The smallest size of grains to be included (in nm^2), anything smaller than this is considered noise and removed. **NB** must be `> 0.0`. | +| | `threshold_method` | float | `std_dev` | Threshold method for grain finding. Options : `otsu`, `std_dev`, `absolute` | +| | `otsu_threshold_multiplier` | | `1.0` | Factor by which the derived Otsu Threshold should be scaled. | +| | `threshold_std_dev` | dictionary | `10.0, 1.0` | A pair of values that scale the standard deviation, after scaling the standard deviation `below` is subtracted from the image mean to give the below/lower threshold and the `above` is added to the image mean to give the above/upper threshold. These values should _always_ be positive. | +| | `threshold_absolute` | dictionary | `-1.0, 1.0` | Below (first), above (second) absolute threshold for separating grains from the image background. | +| | `direction` | | `above` | Defines whether to look for grains above or below thresholds or both. Options: `above`, `below`, `both` | +| | `smallest_grain_size` | int | `50` | Catch-all value for the minimum size of grains. Measured in nanometres squared. All grains with area below than this value are removed. | +| | `absolute_area_threshold` | dictionary | `[300, 3000], [null, null]` | Area thresholds for above the image background (first) and below the image background (second), which grain sizes are permitted, measured in nanometres squared. All grains outside this area range are removed. | +| | `remove_edge_intersecting_grains` | boolean | `true` | Whether to remove grains that intersect the image border. _Do not change this unless you know what you are doing_. This will ruin any statistics relating to grain size, shape and DNA traces. | +| `grainstats` | `run` | boolean | `true` | Whether to calculate grain statistics. Options : `true`, `false` | +| | `cropped_size` | float | `40.0` | Force cropping of grains to this length (in nm) of square cropped images (can take `-1` for grain-sized box) | +| | `edge_detection_method` | str | `binary_erosion` | Type of edge detection method to use when determining the edges of grain masks before calculating statistics on them. Options : `binary_erosion`, `canny`. | +| `dnatracing` | `run` | boolean | `true` | Whether to run DNA Tracing. Options : true, false | +| | `min_skeleton_size` | int | `10` | The minimum number of pixels a skeleton should be for statistics to be calculated on it. Anything smaller than this is dropped but grain statistics are retained. | +| | `skeletonisation_method` | str | `topostats` | Skeletonisation method to use, possible options are `zhang`, `lee`, `thin` (from [Scikit-image Morphology module](https://scikit-image.org/docs/stable/api/skimage.morphology.html)) or the original bespoke TopoStas method `topostats`. | +| | `spline_step_size` | float | `7.0e-9` | The sampling rate of the spline in metres. This is the frequency at which points are sampled from fitted traces to act as guide points for the splining process using scipy's splprep. | +| | `spline_linear_smoothing` | float | `5.0` | The amount of smoothing to apply to splines of linear molecule traces. | +| | `spline_circular_smoothing` | float | `0.0` | The amount of smoothing to apply to splines of circular molecule traces. | +| | `pad_width` | int | 10 | Padding for individual grains when tracing. This is sometimes required if the bounding box around grains is too tight and they touch the edge of the image. | +| | `cores` | int | 1 | Number of cores to use for tracing. **NB** Currently this is NOT used and should be left commented in the YAML file. | +| `plotting` | `run` | boolean | `true` | Whether to run plotting. Options : `true`, `false` | +| | `style` | str | `topostats.mplstyle` | The default loads a custom [matplotlibrc param file](https://matplotlib.org/stable/users/explain/customizing.html#the-matplotlibrc-file) that comes with TopoStats. Users can specify the path to their own style file as an alternative. | +| | `save_format` | string | `null` | Format to save images in, `null` defaults to `png` see [matplotlib.pyplot.savefig](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html) | +| | `savefig_dpi` | string / float | `null` | Dots Per Inch (DPI), if `null` then the value `figure` is used, for other values (typically integers) see [#further-customisation] and [Matplotlib](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html). Low DPI's improve processing time but can reduce the plotted trace (but not the actual trace) accuracy. | +| | `pixel_interpolation` | string | `null` | Interpolation method for image plots. Recommended default 'null' prevents banding that occurs in some images. If interpolation is needed, we recommend `gaussian`. See [matplotlib imshow interpolations documentation](https://matplotlib.org/stable/gallery/images_contours_and_fields/interpolation_methods.html) for details. | +| | `image_set` | string | `all` | Which images to plot. Options : `all`, `core` | +| | `zrange` | list | `[0, 3]` | Low (first number) and high (second number) height range for core images (can take [null, null]). **NB** `low <= high` otherwise you will see a `ValueError: minvalue must be less than or equal to maxvalue` error. | +| | `colorbar` | boolean | `true` | Whether to include the colorbar scale in plots. Options `true`, `false` | +| | `axes` | boolean | `true` | Whether to include the axes in the produced plots. | +| | `num_ticks` | null / int | `null` | Number of ticks to have along the x and y axes. Options : `null` (auto) or an integer >1 | +| | `cmap` | string | `null` | Colormap/colourmap to use (defaults to 'nanoscope' if null (defined in `topostats/topostats.mplstyle`). Other options are 'afmhot', 'viridis' etc., see [Matplotlib : Choosing Colormaps](https://matplotlib.org/stable/users/explain/colors/colormaps.html). | +| | `mask_cmap` | string | `blu` | Color used when masking regions. Options `blu`, `jet_r` or any valid Matplotlib colour. | +| | `histogram_log_axis` | boolean | `false` | Whether to plot hisograms using a logarithmic scale or not. Options: `true`, `false`. | +| `summary_stats` | `run` | boolean | `true` | Whether to generate summary statistical plots of the distribution of different metrics grouped by the image that has been processed. | +| | `config` | str | `null` | Path to a summary config YAML file that configures/controls how plotting is done. If one is not specified either the command line argument `--summary_config` value will be used or if that option is not invoked the default `topostats/summary_config.yaml` will be used. | ## Summary Configuration From 161b1da46e96fef7f2dd0eddc6469af01ec0657d Mon Sep 17 00:00:00 2001 From: Neil Shephard Date: Mon, 22 Jan 2024 12:03:17 +0000 Subject: [PATCH 7/8] PR Feedback Thanks for the feedback @MaxGamil-Sheffield this commit... + Loosens validation of `cmap` so that any Matplotlib color map can be used. + Removes reporting of DPI/output format/cmap from early logging stages and output of `completion_message()`. I hadn't thought about `None` being listed in the `completion_message()` for DPI/Output Format/cmap and appreciate this would be confusing so thanks for highlighting that. The solution I've gone for (removing the additions that reported these) is different from that suggested (update the `config` dictionary with parameters from `mpl.rcParams` early in processing). My reasoning being... + Previously we didn't report these, no one has ever asked to see them in the logging output. + We write configuration options to YAML file via `write_yaml()` at the end of processing. Its a verbatim copy of that which was used (either user specified or `default_config.yaml`)and it contains the settings used. If a user didn't specify DPI/cmap/format I'm not sure we should alter this. It could be argued it is useful to provide them but then that would also require writing _all_ other configuration/plotting options to be consistent should the default values ever change in the future. Currently only a handful of parameters are read from `topostats.mplstyle` and this is conditional on whether any of these are being over-ridden or not when instantiating the `Images()` class. Currently we ``` plottingfuncs.Images() > load_mplstyle() > Set DPI/cmap/format based on arguments to Images() ``` It is certainly possible to change this process as suggested and.. ``` load_mplstyle() > Update DPI/cmap/format > plottingfuncs.Images() ``` ...but that is a larger amount of work to undertake and introduces scope drift to this PR. If it is desirable to report this information in logging and/or ensure the configuration file that is written contains the default parameters from `topostats.mplstyle` then we can address that as a separate issue. --- topostats/processing.py | 5 +---- topostats/run_topostats.py | 4 +--- topostats/validation.py | 7 +++---- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/topostats/processing.py b/topostats/processing.py index 434a94626e4..f6d04013379 100644 --- a/topostats/processing.py +++ b/topostats/processing.py @@ -721,10 +721,7 @@ def completion_message(config: dict, img_files: list, summary_config: dict, imag f" Successfully Processed^1 : {images_processed} ({(images_processed * 100) / len(img_files)}%)\n" f" All statistics : {str(config['output_dir'])}/all_statistics.csv\n" f" Distribution Plots : {distribution_plots_message}\n\n" - f" Configuration : {config['output_dir']}/config.yaml\n" - f" DPI : {config['plotting']['savefig_dpi']}\n" - f" Output image format : {config['plotting']['savefig_format']}\n" - f" Colormap : {config['plotting']['cmap']}\n\n" + f" Configuration : {config['output_dir']}/config.yaml\n\n" f" Email : topostats@sheffield.ac.uk\n" f" Documentation : https://afm-spm.github.io/topostats/\n" f" Source Code : https://github.com/AFM-SPM/TopoStats/\n" diff --git a/topostats/run_topostats.py b/topostats/run_topostats.py index ecb5d6cbe10..fa0b6a6cac8 100644 --- a/topostats/run_topostats.py +++ b/topostats/run_topostats.py @@ -99,9 +99,6 @@ def run_topostats(args=None): # noqa: C901 sys.exit() LOGGER.info(f"Thresholding method (Filtering) : {config['filter']['threshold_method']}") LOGGER.info(f"Thresholding method (Grains) : {config['grains']['threshold_method']}") - LOGGER.info(f"DPI : {config['plotting']['savefig_dpi']}") - LOGGER.info(f"Output image format : {config['plotting']['savefig_format']}") - LOGGER.info(f"Colormap : {config['plotting']['cmap']}") LOGGER.debug(f"Configuration after update : \n{pformat(config, indent=4)}") # noqa : T203 processing_function = partial( @@ -223,4 +220,5 @@ def run_topostats(args=None): # noqa: C901 config["plotting"].pop("plot_dict") write_yaml(config, output_dir=config["output_dir"]) LOGGER.debug(f"Images processed : {images_processed}") + # Update config with plotting defaults for printing completion_message(config, img_files, summary_config, images_processed) diff --git a/topostats/validation.py b/topostats/validation.py index bf2a6e8bc95..8df8fdfdbd8 100644 --- a/topostats/validation.py +++ b/topostats/validation.py @@ -284,10 +284,9 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: ), "cmap": Or( None, - "afmhot", - "nanoscope", - "gwyddion", - error="Invalid value in config for 'plotting.cmap', valid values are 'afmhot', 'nanoscope' or 'gwyddion'", + str, + error="Invalid value in config for 'plotting.cmap', valid values are 'afmhot', 'nanoscope', " + "'gwyddion' or values supported by Matplotlib", ), "mask_cmap": str, "histogram_log_axis": Or( From 8bfa7007563d4e261fe37bf3a9559dc68d27d765 Mon Sep 17 00:00:00 2001 From: Neil Shephard Date: Wed, 24 Jan 2024 14:19:21 +0000 Subject: [PATCH 8/8] Addressing further PR feedback. + Correctly details in validation the values for `figure` in plotting. + Details what the `core` set outputs. I've not added the request to add links in validation output to Matplotlib cmap as links already exist in the documentation and I would expect if someone wishes to use a particular colormap here they would already be aware of what the options are. --- docs/configuration.md | 6 +++--- topostats/validation.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 4adf202858f..2089819ce7b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -96,7 +96,7 @@ Aside from the comments in YAML file itself the fields are described below. | | `save_format` | string | `null` | Format to save images in, `null` defaults to `png` see [matplotlib.pyplot.savefig](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html) | | | `savefig_dpi` | string / float | `null` | Dots Per Inch (DPI), if `null` then the value `figure` is used, for other values (typically integers) see [#further-customisation] and [Matplotlib](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html). Low DPI's improve processing time but can reduce the plotted trace (but not the actual trace) accuracy. | | | `pixel_interpolation` | string | `null` | Interpolation method for image plots. Recommended default 'null' prevents banding that occurs in some images. If interpolation is needed, we recommend `gaussian`. See [matplotlib imshow interpolations documentation](https://matplotlib.org/stable/gallery/images_contours_and_fields/interpolation_methods.html) for details. | -| | `image_set` | string | `all` | Which images to plot. Options : `all`, `core` | +| | `image_set` | string | `all` | Which images to plot. Options : `all`, `core` (flattened image, grain mask overlay and trace overlay only). | | | `zrange` | list | `[0, 3]` | Low (first number) and high (second number) height range for core images (can take [null, null]). **NB** `low <= high` otherwise you will see a `ValueError: minvalue must be less than or equal to maxvalue` error. | | | `colorbar` | boolean | `true` | Whether to include the colorbar scale in plots. Options `true`, `false` | | | `axes` | boolean | `true` | Whether to include the axes in the produced plots. | @@ -169,8 +169,8 @@ through the basics. ### Further customisation Whilst the overall look of images is controlled in this manner there is one additional file that controls how -images are plotted in terms of filenames, titles and image types and whether an image is part of the `core` subset that -are always generated or not. +images are plotted in terms of filenames, titles and image types and whether an image is part of the `core` subset +(flattened image, grain mask overlay and trace overlay) that are always generated or not. This is the `topostats/plotting_dictionary.yaml` which for each image stage defines whether it is a component of the `core` subset of images that are always generated, sets the `filename`, the `title` on the plot, the `image_type` diff --git a/topostats/validation.py b/topostats/validation.py index 8df8fdfdbd8..2b7aecf8979 100644 --- a/topostats/validation.py +++ b/topostats/validation.py @@ -239,7 +239,7 @@ def validate_config(config: dict, schema: Schema, config_type: str) -> None: None, "figure", lambda n: n > 0, - error="Invalid value in config for plotting.savefig_dpi, valid" "values are 'figure' or integers", + error="Invalid value in config for plotting.savefig_dpi, valid" "values are 'figure' or floats", ), "image_set": Or( "all",