Module rok4_tools.pyrolyse

Expand source code
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
import argparse
import logging
import json
import os
import numpy
import time
import tempfile
import copy
from tqdm import tqdm

from rok4.pyramid import Pyramid, ROK4_IMAGE_HEADER_SIZE, SlabType
from rok4.storage import put_data_str, get_size, get_path_from_infos, get_data_binary

from rok4_tools import __version__

# Default logger
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO)

args = None
pyramid = None
total = None
pbar = None

stat_part = {
    "slab_count": 0,
    "slab_sizes": [],
    "link_count": 0
}

stats = {
    "global": {
        "slab_count": 0,
        "slab_sizes": [],
        "link_count": 0
    },
    "levels": {}
}


quantiles = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]

def parse() -> None:
    """Parse call arguments and check values

    Exit program if an error occured

    Raises:
        Exception: option --progress is used without --json
    """

    global args, stat_part, stats
    
    parser = argparse.ArgumentParser(
        prog = 'pyrolyse',
        description = "Tool to process statistics about pyramid's data : count and size of tiles and slabs, min and max, by level",
        epilog = ''
    )

    parser.add_argument(
        '--version',
        action='version',
        version='%(prog)s ' + __version__
    )

    parser.add_argument(
        '--pyramid',
        metavar='storage://path/to/pyr.json',
        action='store',
        type=str,
        dest='pyramid',
        help="Pyramid's descriptor, to analyse",
        required=True
    )

    parser.add_argument(
        '--output',
        metavar='storage://path/to/conf.json',
        action='store',
        dest='output_path',
        help='File/object to write results. Print in standard output if not provided',
        required=False
    )

    parser.add_argument(
        '--tiles',
        action='store_true',
        dest='tiles',
        help='Get size analysis for tiles',
        required=False
    )

    parser.add_argument(
        '--progress',
        action='store_true',
        dest='progress',
        help='Print a progress bar (only with --output option)',
        required=False
    )

    parser.add_argument(
        '--deciles',
        action='store_true',
        dest='deciles',
        help='Get deciles for sizes and read times rather than values',
        required=False
    )

    parser.add_argument(
        '--ratio',
        type=int,
        metavar="N",
        action='store',
        dest='ratio',
        default=100,
        help='Ratio of measured slabs and tiles (<ratio> choose one). All slabs are counted',
        required=False
    )

    args = parser.parse_args()

    if args.tiles:
        stat_part["tile_sizes"] = []
        stats["global"]["tile_sizes"] = []
        stats["perfs"] = []

    if args.progress and args.output_path is None:
        raise Exception("Print a progress bar is not possible without output file for statistics (--json option)")


def load() -> None:
    """Create Pyramid object from the descriptor's path

    Raises:
        FormatError: Provided path or the TMS is not a well formed JSON
        Exception: Level issue : no one in the pyramid or the used TMS, or level ID not defined in the TMS
        MissingAttributeError: Attribute is missing in the content
        StorageError: Storage read issue (pyramid descriptor or TMS)
        MissingEnvironmentError: Missing object storage informations or TMS root directory
    """    

    global pyramid, total, pbar

    pyramid = Pyramid.from_descriptor(args.pyramid)
    total = pyramid.load_list()

    if args.progress:
        pbar = tqdm(total=total)

def work() -> None:
    """Browse pyramid's list and memorize wanted informations

    If tiles' statistics wanted, we keep only one non null tile size by slab. We don't consider size of slab link but the slab target one.
    """

    global stat_part, stats, pyramid, pbar

    slab_tiles_count = pyramid.bottom_level.slab_width * pyramid.bottom_level.slab_height
    slab_sizes_offset = ROK4_IMAGE_HEADER_SIZE + 4 * slab_tiles_count
    slab_sizes_size = 4 * slab_tiles_count

    for (slab_type, level, column, row), infos in pyramid.list_generator():
        if slab_type != SlabType.DATA:
            continue

        if level not in stats["levels"]:
            stats["levels"][level] = copy.deepcopy(stat_part)

        stats["global"]["slab_count"] += 1
        stats["levels"][level]["slab_count"] += 1
        
        if infos["link"]:
            stats["global"]["link_count"] += 1
            stats["levels"][level]["link_count"] += 1

        if (stats["levels"][level]["slab_count"] - 1) % args.ratio == 0:
            slab_path = get_path_from_infos(pyramid.storage_type, infos["root"], infos["slab"])
            size = get_size(slab_path)
            stats["global"]["slab_sizes"].append(size)
            stats["levels"][level]["slab_sizes"].append(size)

            if args.tiles:
                tic = time.perf_counter()
                binary_sizes = get_data_binary(slab_path, (slab_sizes_offset, slab_sizes_size))
                toc = time.perf_counter()
                stats["perfs"].append(toc - tic)
                sizes = list(filter(
                    lambda e: e != 0,
                    numpy.frombuffer(
                        binary_sizes,
                        dtype = numpy.dtype('uint32'),
                        count = slab_tiles_count
                    ).tolist()
                ))

                # On ne garde que la première taille de tuile non nulle pour les statistiques
                stats["levels"][level]["tile_sizes"].append(sizes[0])
                stats["global"]["tile_sizes"].append(sizes[0])

        if args.progress:
            pbar.update(1)

    if args.progress:
        pbar.close()

    # calcul des quantiles
    if len(stats["global"]["slab_sizes"]) > 1 and args.deciles:
        stats["global"]["slab_sizes"] = numpy.quantile(stats["global"]["slab_sizes"], quantiles).tolist()

    if args.tiles:
        if len(stats["perfs"]) > 1 and args.deciles:
            stats["perfs"] = numpy.quantile(stats["perfs"], quantiles).tolist()

        if len(stats["global"]["tile_sizes"]) > 1 and args.deciles:
            stats["global"]["tile_sizes"] = numpy.quantile(stats["global"]["tile_sizes"], quantiles).tolist()

    for level in stats["levels"]:
        if len(stats["levels"][level]["slab_sizes"]) > 1 and args.deciles:
            stats["levels"][level]["slab_sizes"] = numpy.quantile(stats["levels"][level]["slab_sizes"], quantiles).tolist()

        if args.tiles and len(stats["levels"][level]["tile_sizes"]) > 1 and args.deciles:
            stats["levels"][level]["tile_sizes"] = numpy.quantile(stats["levels"][level]["tile_sizes"], quantiles).tolist()


def write() -> None:
    """Write the informations as JSON, in the standard output or a file
    """    
    if args.output_path is None:
        print(json.dumps(stats))
    else:
        put_data_str(json.dumps(stats), args.output_path)


def main() -> None:

    try:
        parse()
        load()
        work()
        write()

    except FileNotFoundError as e:
        logging.error(f"{e} does not exists")
        sys.exit(1)

    except Exception as e:
        logging.error(e)
        sys.exit(1)

    sys.exit(0)

if __name__ == "__main__": 
    main()

Functions

def load() ‑> None

Create Pyramid object from the descriptor's path

Raises

FormatError
Provided path or the TMS is not a well formed JSON
Exception
Level issue : no one in the pyramid or the used TMS, or level ID not defined in the TMS
MissingAttributeError
Attribute is missing in the content
StorageError
Storage read issue (pyramid descriptor or TMS)
MissingEnvironmentError
Missing object storage informations or TMS root directory
Expand source code
def load() -> None:
    """Create Pyramid object from the descriptor's path

    Raises:
        FormatError: Provided path or the TMS is not a well formed JSON
        Exception: Level issue : no one in the pyramid or the used TMS, or level ID not defined in the TMS
        MissingAttributeError: Attribute is missing in the content
        StorageError: Storage read issue (pyramid descriptor or TMS)
        MissingEnvironmentError: Missing object storage informations or TMS root directory
    """    

    global pyramid, total, pbar

    pyramid = Pyramid.from_descriptor(args.pyramid)
    total = pyramid.load_list()

    if args.progress:
        pbar = tqdm(total=total)
def main() ‑> None
Expand source code
def main() -> None:

    try:
        parse()
        load()
        work()
        write()

    except FileNotFoundError as e:
        logging.error(f"{e} does not exists")
        sys.exit(1)

    except Exception as e:
        logging.error(e)
        sys.exit(1)

    sys.exit(0)
def parse() ‑> None

Parse call arguments and check values

Exit program if an error occured

Raises

Exception
option –progress is used without –json
Expand source code
def parse() -> None:
    """Parse call arguments and check values

    Exit program if an error occured

    Raises:
        Exception: option --progress is used without --json
    """

    global args, stat_part, stats
    
    parser = argparse.ArgumentParser(
        prog = 'pyrolyse',
        description = "Tool to process statistics about pyramid's data : count and size of tiles and slabs, min and max, by level",
        epilog = ''
    )

    parser.add_argument(
        '--version',
        action='version',
        version='%(prog)s ' + __version__
    )

    parser.add_argument(
        '--pyramid',
        metavar='storage://path/to/pyr.json',
        action='store',
        type=str,
        dest='pyramid',
        help="Pyramid's descriptor, to analyse",
        required=True
    )

    parser.add_argument(
        '--output',
        metavar='storage://path/to/conf.json',
        action='store',
        dest='output_path',
        help='File/object to write results. Print in standard output if not provided',
        required=False
    )

    parser.add_argument(
        '--tiles',
        action='store_true',
        dest='tiles',
        help='Get size analysis for tiles',
        required=False
    )

    parser.add_argument(
        '--progress',
        action='store_true',
        dest='progress',
        help='Print a progress bar (only with --output option)',
        required=False
    )

    parser.add_argument(
        '--deciles',
        action='store_true',
        dest='deciles',
        help='Get deciles for sizes and read times rather than values',
        required=False
    )

    parser.add_argument(
        '--ratio',
        type=int,
        metavar="N",
        action='store',
        dest='ratio',
        default=100,
        help='Ratio of measured slabs and tiles (<ratio> choose one). All slabs are counted',
        required=False
    )

    args = parser.parse_args()

    if args.tiles:
        stat_part["tile_sizes"] = []
        stats["global"]["tile_sizes"] = []
        stats["perfs"] = []

    if args.progress and args.output_path is None:
        raise Exception("Print a progress bar is not possible without output file for statistics (--json option)")
def work() ‑> None

Browse pyramid's list and memorize wanted informations

If tiles' statistics wanted, we keep only one non null tile size by slab. We don't consider size of slab link but the slab target one.

Expand source code
def work() -> None:
    """Browse pyramid's list and memorize wanted informations

    If tiles' statistics wanted, we keep only one non null tile size by slab. We don't consider size of slab link but the slab target one.
    """

    global stat_part, stats, pyramid, pbar

    slab_tiles_count = pyramid.bottom_level.slab_width * pyramid.bottom_level.slab_height
    slab_sizes_offset = ROK4_IMAGE_HEADER_SIZE + 4 * slab_tiles_count
    slab_sizes_size = 4 * slab_tiles_count

    for (slab_type, level, column, row), infos in pyramid.list_generator():
        if slab_type != SlabType.DATA:
            continue

        if level not in stats["levels"]:
            stats["levels"][level] = copy.deepcopy(stat_part)

        stats["global"]["slab_count"] += 1
        stats["levels"][level]["slab_count"] += 1
        
        if infos["link"]:
            stats["global"]["link_count"] += 1
            stats["levels"][level]["link_count"] += 1

        if (stats["levels"][level]["slab_count"] - 1) % args.ratio == 0:
            slab_path = get_path_from_infos(pyramid.storage_type, infos["root"], infos["slab"])
            size = get_size(slab_path)
            stats["global"]["slab_sizes"].append(size)
            stats["levels"][level]["slab_sizes"].append(size)

            if args.tiles:
                tic = time.perf_counter()
                binary_sizes = get_data_binary(slab_path, (slab_sizes_offset, slab_sizes_size))
                toc = time.perf_counter()
                stats["perfs"].append(toc - tic)
                sizes = list(filter(
                    lambda e: e != 0,
                    numpy.frombuffer(
                        binary_sizes,
                        dtype = numpy.dtype('uint32'),
                        count = slab_tiles_count
                    ).tolist()
                ))

                # On ne garde que la première taille de tuile non nulle pour les statistiques
                stats["levels"][level]["tile_sizes"].append(sizes[0])
                stats["global"]["tile_sizes"].append(sizes[0])

        if args.progress:
            pbar.update(1)

    if args.progress:
        pbar.close()

    # calcul des quantiles
    if len(stats["global"]["slab_sizes"]) > 1 and args.deciles:
        stats["global"]["slab_sizes"] = numpy.quantile(stats["global"]["slab_sizes"], quantiles).tolist()

    if args.tiles:
        if len(stats["perfs"]) > 1 and args.deciles:
            stats["perfs"] = numpy.quantile(stats["perfs"], quantiles).tolist()

        if len(stats["global"]["tile_sizes"]) > 1 and args.deciles:
            stats["global"]["tile_sizes"] = numpy.quantile(stats["global"]["tile_sizes"], quantiles).tolist()

    for level in stats["levels"]:
        if len(stats["levels"][level]["slab_sizes"]) > 1 and args.deciles:
            stats["levels"][level]["slab_sizes"] = numpy.quantile(stats["levels"][level]["slab_sizes"], quantiles).tolist()

        if args.tiles and len(stats["levels"][level]["tile_sizes"]) > 1 and args.deciles:
            stats["levels"][level]["tile_sizes"] = numpy.quantile(stats["levels"][level]["tile_sizes"], quantiles).tolist()
def write() ‑> None

Write the informations as JSON, in the standard output or a file

Expand source code
def write() -> None:
    """Write the informations as JSON, in the standard output or a file
    """    
    if args.output_path is None:
        print(json.dumps(stats))
    else:
        put_data_str(json.dumps(stats), args.output_path)