Source code for aces.cleanlog.clean_parser

import logging
import re
from datetime import datetime

import pandas as pd

[docs]LOGGER_REGEX = r"^(?P<level>\w+)\s+(?P<process>[\w\.]+)\s+\((?P<rank>\d+), (?P<node>\w+)\)\s+\[(?P<datetime>.+?)\] - (?P<message>.+)$"

[docs]logger = logging.getLogger(__name__)

[docs]def parse_clean_log(clean_log_file: (str)) -> pd.DataFrame:
    """Parse an ASKAPSoft imager log file and extract deconvolution data.

    Args:
        clean_log_file (str): filename of imager log file

    Returns:
        pd.DataFrame: devconulation data at different minor iterations
    """
    
    # regex for extracting deconvolution information
    logger_pattern = re.compile(LOGGER_REGEX)
    minor_cycle_message_pattern = re.compile(
        (
            r"^Iteration (?P<iteration>\d+), "
            r"Peak residual (?P<peak_resid>.+?), Objective function (?P<obj_func>.+?), Total flux "
            r"(?P<total_flux>.+?)$"
        )
    )

    # parse imager log file
    minor_cycles = []
    with open(clean_log_file) as f:
        for line in f:
            match = logger_pattern.match(line)
            if match:
                log = match.groupdict()
                if log["process"] == "deconvolution.monitor":
                    # extract the minor cycle info (iteration, peak_resid, obj_func, etc)
                    try:
                        minor_cycle_regex = minor_cycle_message_pattern.match(
                            log["message"]
                        )
                        
                        # Do nothing if the line did not match
                        if minor_cycle_regex is None:
                            continue
                        
                        minor_cycle_data = minor_cycle_regex.groupdict()
                        
                        log.update(minor_cycle_data)
                        minor_cycles.append(log)
                    except AttributeError:
                        logger.warning(
                            f"Log message regex match failed for line groupdict: {log}"
                        )
                
                elif log["process"] == "deconvolution.control":
                    minor_cycles[-1].update({"major_stop_reason": log["message"]})
                elif (
                    log["process"] == "deconvolution.multitermbasisfunction"
                    and "Exceeded" in log["message"]
                ):
                    minor_cycles[-1].update({"major_stop_reason": log["message"]})

    # format extracted data
    for d in minor_cycles:
        d["datetime"] = datetime.strptime(d["datetime"], "%Y-%m-%d %H:%M:%S,%f")
        d["peak_resid"] = float(d["peak_resid"])
        d["iteration"] = int(d["iteration"])
        d["obj_func"] = float(d["obj_func"])
        d["total_flux"] = float(d["total_flux"])

    minor_df = pd.DataFrame(data=minor_cycles).set_index("datetime")

    # determine major cycle numbers
    minor_df["major_cycle"] = -1
    minor_iter0_timestamps = minor_df.query("iteration == 0").index
    for i, date in enumerate(minor_iter0_timestamps):
        rows = minor_df.query("index >= @date").index
        minor_df.loc[rows, "major_cycle"] = i

    return minor_df