From 4638c6a8d3b64990cc5d8da83862f9f49365a1a4 Mon Sep 17 00:00:00 2001 From: IlyaMuravjov Date: Mon, 4 Mar 2024 00:58:06 +0300 Subject: [PATCH] Refactor evaluation scripts and solve Gigascale SIGTTIN issue --- Dockerfile-all-tools | 32 +-- cli/all_pairs_cflr_command_manager.py | 195 ------------------ cli/eval_all_pairs_cflr.py | 76 +++---- cli/runners/all_pairs_cflr_tool_runner.py | 81 ++++++++ .../all_pairs_cflr_tool_runner_facade.py | 25 +++ ...gascale_algo_all_pairs_cflr_tool_runner.py | 70 +++++++ ...graspan_algo_all_pairs_cflr_tool_runner.py | 65 ++++++ .../pearl_algo_all_pairs_cflr_tool_runner.py | 38 ++++ .../pocr_algo_all_pairs_cflr_tool_runner.py | 25 +++ .../py_algo_all_pairs_cflr_tool_runner.py | 21 ++ 10 files changed, 368 insertions(+), 260 deletions(-) delete mode 100644 cli/all_pairs_cflr_command_manager.py create mode 100644 cli/runners/all_pairs_cflr_tool_runner.py create mode 100644 cli/runners/all_pairs_cflr_tool_runner_facade.py create mode 100644 cli/runners/gigascale_algo_all_pairs_cflr_tool_runner.py create mode 100644 cli/runners/graspan_algo_all_pairs_cflr_tool_runner.py create mode 100644 cli/runners/pearl_algo_all_pairs_cflr_tool_runner.py create mode 100644 cli/runners/pocr_algo_all_pairs_cflr_tool_runner.py create mode 100644 cli/runners/py_algo_all_pairs_cflr_tool_runner.py diff --git a/Dockerfile-all-tools b/Dockerfile-all-tools index 7f09506..22d2270 100644 --- a/Dockerfile-all-tools +++ b/Dockerfile-all-tools @@ -16,21 +16,6 @@ RUN apt-get update && apt-get install -y \ wget \ npm -# Gigascale dependencies -RUN apt-get update && apt-get install -y openjdk-17-jdk ant python2 - -# Graspan dependencies -RUN apt-get update && apt-get install -y \ - build-essential \ - wget \ - make \ - g++ \ - libboost-all-dev \ - git -RUN wget -qO- https://sourceforge.net/projects/boost/files/boost/1.62.0/boost_1_62_0.tar.gz | tar xvz -C /usr/local -ENV BOOST_ROOT=/usr/local/boost_1_62_0 -ENV LD_LIBRARY_PATH=$BOOST_ROOT/stage/lib:$LD_LIBRARY_PATH - ## POCR dependency, that needs to be built from sources RUN git clone https://github.com/kisslune/SVF.git /SVF \ && cd /SVF \ @@ -45,6 +30,9 @@ RUN git clone https://github.com/kisslune/POCR.git /POCR \ && echo 'pushd /POCR > /dev/null && source setup.sh && popd > /dev/null' >> /root/.bashrc ENV POCR_DIR=/POCR +# Gigascale dependencies +RUN apt-get update && apt-get install -y openjdk-17-jdk ant python2 expect + # Gigascale (specialized CFL-r tools) RUN git clone https://bitbucket.org/jensdietrich/gigascale-pointsto-oopsla2015.git /gigascale RUN sed -i 's/python /python2 /g' /gigascale/run.sh @@ -52,6 +40,18 @@ RUN sed -i 's/-d64 //g' /gigascale/run.sh ENV JAVA_TOOL_OPTIONS="-Dfile.encoding=UTF8" ENV GIGASCALE_DIR=/gigascale +# Graspan dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + wget \ + make \ + g++ \ + libboost-all-dev \ + git +RUN wget -qO- https://sourceforge.net/projects/boost/files/boost/1.62.0/boost_1_62_0.tar.gz | tar xvz -C /usr/local +ENV BOOST_ROOT=/usr/local/boost_1_62_0 +ENV LD_LIBRARY_PATH=$BOOST_ROOT/stage/lib:$LD_LIBRARY_PATH + # Graspan (general-purpose CFL-r tool) RUN git clone https://github.com/Graspan/Graspan-C.git /graspan RUN sed -i 's|/home/aftab/Downloads/boost_1_62_installed|'$BOOST_ROOT'|g' /graspan/src/makefile @@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 RUN python3 -m pip install --upgrade pip -# CFPQ_PyAlgo (this tool) +# CFPQ_PyAlgo (this general-purpose CFL-r tool) COPY requirements.txt /py_algo/requirements.txt COPY deps/CFPQ_Data /py_algo/deps/CFPQ_Data RUN pip3 install pygraphblas==5.1.8.0 diff --git a/cli/all_pairs_cflr_command_manager.py b/cli/all_pairs_cflr_command_manager.py deleted file mode 100644 index a3f4575..0000000 --- a/cli/all_pairs_cflr_command_manager.py +++ /dev/null @@ -1,195 +0,0 @@ -import os -import re -from abc import ABC, abstractmethod -from pathlib import Path -from typing import Optional, Tuple - -import psutil - -from src.grammar.cnf_grammar_template import CnfGrammarTemplate -from src.graph.label_decomposed_graph import LabelDecomposedGraph -from src.problems.Base.template_cfg.utils import explode_indices - - -def get_all_pairs_cflr_command_manager( - algo_settings: str, - graph_path: Path, - grammar_path: Path -) -> "AllPairsCflrCommandManager": - return { - "pocr": PocrAllPairsCflrCommandManager, - "pearl": PearlAllPairsCflrCommandManager, - "gigascale": GigascaleAllPairsCflrCommandManager, - "graspan": GraspanAllPairsCflrCommandManager - }.get(algo_settings, PyAlgoAllPairsCflrCommandManager)( - algo_settings, graph_path, grammar_path - ) - - -class AllPairsCflrCommandManager(ABC): - def __init__( - self, - algo_settings: str, - graph_path: Path, - grammar_path: Path - ): - self.algo_settings = algo_settings - self.graph_path = graph_path - self.grammar_path = grammar_path - - @abstractmethod - def create_command(self) -> str: - pass - - # noinspection PyMethodMayBeStatic - def discard_stderr(self) -> bool: - return False - - @property - def work_dir(self) -> Optional[Path]: - return None - - # noinspection PyMethodMayBeStatic - def get_analysis_time(self, output: str) -> float: - return float(re.search(r"AnalysisTime\s+([\d.]+|NaN)", output).group(1)) - - # noinspection PyMethodMayBeStatic - def get_edge_count(self, output: str) -> int: - return re.search(r"#(SEdges|CountEdges)\s+([\d.]+|NaN)", output).group(2) - - -class PyAlgoAllPairsCflrCommandManager(AllPairsCflrCommandManager): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def create_command(self) -> Optional[str]: - return f"python3 -m cli.run_all_pairs_cflr {self.algo_settings} {self.graph_path} {self.grammar_path}" - - -class PocrAllPairsCflrCommandManager(AllPairsCflrCommandManager): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def create_command(self) -> Optional[str]: - return ( - f'{self.grammar_path.stem} -pocr {self.graph_path}' - if self.grammar_path.stem in {"aa", "vf"} - else f'cfl -pocr {self.grammar_path} {self.graph_path}' - ) - - -class PearlAllPairsCflrCommandManager(AllPairsCflrCommandManager): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def create_command(self) -> Optional[str]: - return ( - f'./{self.grammar_path.stem} {self.graph_path} -pearl -scc=false -gf=false' - if self.grammar_path.stem in {"aa", "vf"} - else None - ) - - @property - def work_dir(self) -> Optional[Path]: - return Path(os.environ['PEARL_DIR']) - - def get_edge_count(self, output: str) -> int: - vedges_search = re.search(r"#VEdges\s+(\d+)", output) - if vedges_search: - return vedges_search.group(1) - return re.search(r"#AEdges\s+(\d+)", output).group(1) - - -class GigascaleAllPairsCflrCommandManager(AllPairsCflrCommandManager): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def create_command(self) -> Optional[str]: - return ( - f'./run.sh -wdlrb -i datasets/dacapo9/{self.graph_path.stem}' - if self.grammar_path.stem in {"java_points_to"} - else None - ) - - @property - def work_dir(self) -> Optional[Path]: - return Path(os.environ['GIGASCALE_DIR']) - - # Gigascale sends [INFO] logs to stderr - def discard_stderr(self) -> bool: - return True - - def get_analysis_time(self, output: str) -> float: - return self._get_analysis_time_and_edge_count(output)[0] - - def get_edge_count(self, output: str) -> int: - return self._get_analysis_time_and_edge_count(output)[1] - - @staticmethod - def _get_analysis_time_and_edge_count(output: str) -> Tuple[float, int]: - pattern = (r"benchmark\s+TC-time\s+TC-mem\s+v\s+e\s+vpt\s+avg\s+max\s+load/f\s+store/f\s*\n" - r"\w+\s+" - r"(\d+\.\d+)\s+" - r"\d+(?:\.\d+)?\s+" - r"\d+\s+" - r"\d+\s+" - r"(\d+)\s+" - r"\d+(?:\.\d+)?\s+" - r"\d+\s+" - r"\d+\s+" - r"\d+") - - match = re.search(pattern, output) - - tc_time, vpt = match.groups() - return float(tc_time), int(vpt) - - -class GraspanAllPairsCflrCommandManager(AllPairsCflrCommandManager): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def create_command(self) -> Optional[str]: - grammar = CnfGrammarTemplate.read_from_pocr_cnf_file(self.grammar_path) - graph = LabelDecomposedGraph.read_from_pocr_graph_file(self.graph_path) - - # Graspan doesn't support indexed symbols, we need to concat labels and indices - if graph.block_matrix_space.block_count > 1: - graph, grammar = explode_indices(graph, grammar) - graph_path = self.graph_path.parent / "graspan" / self.graph_path.name - os.makedirs(graph_path.parent, exist_ok=True) - graph.write_to_pocr_graph_file(graph_path) - else: - graph_path = self.graph_path - - # Graspan doesn't support grammars with over 255 symbols, because - # each symbol is encoded with one byte and one symbol is reserved for epsilon - if len(grammar.symbols) > 255: - return None - - grammar_path = self.grammar_path.parent / "graspan" / self.grammar_path.name - os.makedirs(grammar_path.parent, exist_ok=True) - grammar.write_to_pocr_cnf_file(grammar_path, include_starting=False) - - return ( - f'./run {graph_path} {grammar_path} 1 ' - f'{int(psutil.virtual_memory().total / 10**9 * 0.9)} ' - f'{os.cpu_count() * 2}' - ) - - @property - def work_dir(self) -> Optional[Path]: - return Path(os.environ['GRASPAN_DIR']) / "src" - - def get_analysis_time(self, output: str) -> float: - return float(re.search(r"COMP TIME:\s*([\d.]+|NaN)", output).group(1)) - - def get_edge_count(self, output: str) -> int: - final_file = re.search(r"finalFile:\s*(.*)", output).group(1) - start_nonterm = CnfGrammarTemplate.read_from_pocr_cnf_file(self.grammar_path).start_nonterm - with open(final_file, "r") as file: - edges = set() - for line in file: - if line.split()[-1] == start_nonterm.label: - edges.add((line.split()[0], line.split()[1])) - return len(edges) diff --git a/cli/eval_all_pairs_cflr.py b/cli/eval_all_pairs_cflr.py index ec93f3f..ceef7e3 100644 --- a/cli/eval_all_pairs_cflr.py +++ b/cli/eval_all_pairs_cflr.py @@ -1,20 +1,19 @@ import argparse import csv import os -import shlex -import signal import subprocess import sys from pathlib import Path from typing import Optional, List -from cli.all_pairs_cflr_command_manager import get_all_pairs_cflr_command_manager +from cli.runners.all_pairs_cflr_tool_runner import IncompatibleCflrToolError +from cli.runners.all_pairs_cflr_tool_runner_facade import run_appropriate_all_pairs_cflr_tool # see `man timeout` TIMEOUT_EXIT_CODE = 124 -def check_file_for_completion(result_file_path, rounds): +def is_enough_data_collected(result_file_path, rounds): try: with open(result_file_path, 'r') as file: reader = list(csv.reader(file)) @@ -31,7 +30,7 @@ def run_experiment( graph_path: Path, grammar_path: Path, rounds: int, - timeout: Optional[int], + timeout_sec: Optional[int], result_file_path: Path ): graph_base_name = graph_path.stem @@ -40,7 +39,7 @@ def run_experiment( if not os.path.exists(result_file_path): with open(result_file_path, 'w', newline='') as csvfile: writer = csv.writer(csvfile) - writer.writerow(["algo", "graph", "grammar", "edge_count", "ram_kb", "time_sec"]) + writer.writerow(["algo", "graph", "grammar", "s_edges", "ram_kb", "time_sec"]) if "--rewrite-grammar" in algo_settings: algo_settings = algo_settings.replace("--rewrite-grammar", "") @@ -49,61 +48,40 @@ def run_experiment( grammar_path = rewritten_grammar_path for _ in range(rounds): - if check_file_for_completion(result_file_path, rounds): + if is_enough_data_collected(result_file_path, rounds): return - command_manager = get_all_pairs_cflr_command_manager(algo_settings, graph_path, grammar_path) - - temp_ram_file = Path("temp_ram_usage.txt").absolute() - - base_command = command_manager.create_command() - - if base_command is None: - edge_count, ram_kb, time_sec = "-", "-", "-" - else: - command = (f"/usr/bin/time -o {temp_ram_file} -f %M " - + ("" if timeout is None else f"timeout {timeout}s ") - + base_command) - - process = subprocess.Popen( - shlex.split(command), - cwd=command_manager.work_dir, - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL if command_manager.discard_stderr() else None + try: + result = run_appropriate_all_pairs_cflr_tool( + algo_settings=algo_settings, + graph_path=graph_path, + grammar_path=grammar_path, + timeout_sec=timeout_sec ) - try: - output, _ = process.communicate() - except KeyboardInterrupt: - process.send_signal(signal.SIGINT) - try: - process.wait(timeout=5) - except subprocess.TimeoutExpired: - process.kill() - raise - if process.returncode == 0: - output = output.decode() - time_sec = command_manager.get_analysis_time(output) - edge_count = command_manager.get_edge_count(output) - with open(temp_ram_file, 'r') as f: - ram_kb = f.read().strip() - elif process.returncode == TIMEOUT_EXIT_CODE: + s_edges = result.s_edges + ram_kb = result.ram_kb + time_sec = result.time_sec + except IncompatibleCflrToolError: + s_edges, ram_kb, time_sec = "-", "-", "-" + except subprocess.CalledProcessError as e: + if e.returncode == TIMEOUT_EXIT_CODE: print(" Runner process timed out") - edge_count, ram_kb, time_sec = "OOT", "OOT", "OOT" + s_edges, ram_kb, time_sec = "OOT", "OOT", "OOT" else: print( - f" Runner process terminated with return code {process.returncode}\n" + f" Runner process terminated with return code {e.returncode}\n" f" (interpreting as out of memory error)" ) - edge_count, ram_kb, time_sec = "OOM", "OOM", "OOM" + s_edges, ram_kb, time_sec = "OOM", "OOM", "OOM" with open(result_file_path, 'a', newline='') as csvfile: - print(f" {edge_count} {ram_kb} {time_sec}") + print(f" {s_edges} {ram_kb} {time_sec}") writer = csv.writer(csvfile) writer.writerow([ {algo_name}, os.path.basename(graph_base_name), os.path.basename(grammar_base_name), - edge_count, + s_edges, ram_kb, time_sec ]) @@ -114,7 +92,7 @@ def eval_all_pairs_cflr( data_config: Path, result_path: Path, rounds: Optional[int], - timeout: Optional[int], + timeout_sec: Optional[int], ): with open(algo_config, mode='r') as algo_file: algo_reader = csv.DictReader(algo_file) @@ -141,7 +119,7 @@ def eval_all_pairs_cflr( graph_path=graph_path, grammar_path=grammar_path, rounds=rounds, - timeout=timeout, + timeout_sec=timeout_sec, result_file_path=result_file_path ) @@ -168,7 +146,7 @@ def main(raw_args: List[str]): data_config=Path(args.data_config), result_path=Path(args.result_path), rounds=args.rounds, - timeout=args.timeout + timeout_sec=args.timeout ) diff --git a/cli/runners/all_pairs_cflr_tool_runner.py b/cli/runners/all_pairs_cflr_tool_runner.py new file mode 100644 index 0000000..1d96d7b --- /dev/null +++ b/cli/runners/all_pairs_cflr_tool_runner.py @@ -0,0 +1,81 @@ +import re +import shlex +import subprocess +from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + + +@dataclass +class CflrToolRunResult: + s_edges: int + time_sec: float + ram_kb: float + + +class IncompatibleCflrToolError(Exception): + pass + + +class AllPairsCflrToolRunner(ABC): + @abstractmethod + def run(self) -> CflrToolRunResult: + """ + Raises IncompatibleCflrToolError if CFL-r tool can't process this kind of input. + For example, some tools are only compatible with one specific grammar. + + Raises CalledProcessError if CFL-r exits with non-zero exit code. + """ + pass + + +class AbstractAllPairsCflrToolRunner(AllPairsCflrToolRunner, ABC): + def __init__( + self, + algo_settings: str, + graph_path: Path, + grammar_path: Path, + timout_sec: Optional[int] + ): + self.algo_settings = algo_settings + self.graph_path = graph_path + self.grammar_path = grammar_path + self.timeout_sec = timout_sec + + @property + @abstractmethod + def base_command(self) -> Optional[str]: + pass + + @property + def command(self) -> Optional[str]: + if self.base_command is None: + return None + else: + return (f'/usr/bin/time -f "Ram usage in KB: %M;\n" -o /dev/stdout ' + + ('' if self.timeout_sec is None else f'timeout {self.timeout_sec}s ') + + self.base_command) + + def run(self) -> CflrToolRunResult: + if self.command is None: + raise IncompatibleCflrToolError() + process = subprocess.run( + shlex.split(self.command), + cwd=self.work_dir, + stdout=subprocess.PIPE, + text=True, + ) + return self.parse_results(process) + + @abstractmethod + def parse_results(self, process: subprocess.CompletedProcess[str]) -> CflrToolRunResult: + pass + + @staticmethod + def parse_ram_usage_kb(process: subprocess.CompletedProcess[str]) -> float: + return float(re.search(r"Ram usage in KB: ([\d.]+);\n", process.stdout).group(1)) + + @property + def work_dir(self) -> Optional[Path]: + return None diff --git a/cli/runners/all_pairs_cflr_tool_runner_facade.py b/cli/runners/all_pairs_cflr_tool_runner_facade.py new file mode 100644 index 0000000..5284bec --- /dev/null +++ b/cli/runners/all_pairs_cflr_tool_runner_facade.py @@ -0,0 +1,25 @@ +from pathlib import Path +from typing import Optional + +from cli.runners.all_pairs_cflr_tool_runner import CflrToolRunResult +from cli.runners.gigascale_algo_all_pairs_cflr_tool_runner import GigascaleAllPairsCflrToolRunner +from cli.runners.graspan_algo_all_pairs_cflr_tool_runner import GraspanAllPairsCflrToolRunner +from cli.runners.pearl_algo_all_pairs_cflr_tool_runner import PearlAllPairsCflrToolRunner +from cli.runners.pocr_algo_all_pairs_cflr_tool_runner import PocrAllPairsCflrToolRunner +from cli.runners.py_algo_all_pairs_cflr_tool_runner import PyAlgoAllPairsCflrToolRunner + + +def run_appropriate_all_pairs_cflr_tool( + algo_settings: str, + graph_path: Path, + grammar_path: Path, + timeout_sec: Optional[int] +) -> CflrToolRunResult: + return { + "pocr": PocrAllPairsCflrToolRunner, + "pearl": PearlAllPairsCflrToolRunner, + "gigascale": GigascaleAllPairsCflrToolRunner, + "graspan": GraspanAllPairsCflrToolRunner + }.get(algo_settings, PyAlgoAllPairsCflrToolRunner)( + algo_settings, graph_path, grammar_path, timeout_sec + ).run() diff --git a/cli/runners/gigascale_algo_all_pairs_cflr_tool_runner.py b/cli/runners/gigascale_algo_all_pairs_cflr_tool_runner.py new file mode 100644 index 0000000..0589b27 --- /dev/null +++ b/cli/runners/gigascale_algo_all_pairs_cflr_tool_runner.py @@ -0,0 +1,70 @@ +import os +import re +import subprocess +from pathlib import Path +from typing import Optional + +from cli.runners.all_pairs_cflr_tool_runner import AbstractAllPairsCflrToolRunner, CflrToolRunResult, \ + IncompatibleCflrToolError + + +class GigascaleAllPairsCflrToolRunner(AbstractAllPairsCflrToolRunner): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @property + def base_command(self) -> Optional[str]: + return ( + f'./run.sh -wdlrb -i datasets/dacapo9/{self.graph_path.stem}' + if self.grammar_path.stem in {"java_points_to", "java_points_to_rewritten"} + else None + ) + + @property + def work_dir(self) -> Optional[Path]: + return Path(os.environ['GIGASCALE_DIR']) + + def run(self) -> CflrToolRunResult: + if self.command is None: + raise IncompatibleCflrToolError() + # Gigascale run script uses `bash -i -c`, which can't be used repeatedly + # without emulating interactive environment with tools like `expect`. + # Read more about `bash -ic` pitfalls: + # https://stackoverflow.com/questions/39920915/unexpected-sigttin-after-bash-ic-bin-echo-hello-when-bash-scripting + process = subprocess.run( + ["expect"], + cwd=self.work_dir, + stdout=subprocess.PIPE, + text=True, + input= + f""" + set timeout -1 + spawn {self.command} + expect eof + """ + ) + return self.parse_results(process) + + def parse_results(self, process: subprocess.CompletedProcess[str]) -> CflrToolRunResult: + # parses a table like this: + # benchmark TC-time TC-mem v e vpt avg max load/f store/f + # tradebeans 3.5 1055 439693 466969 696316 1.584 581 517 144 + pattern = (r"benchmark\s+TC-time\s+TC-mem\s+v\s+e\s+vpt\s+avg\s+max\s+load/f\s+store/f\s*\n" + r"\w+\s+" + r"(\d+\.\d+)\s+" + r"\d+(?:\.\d+)?\s+" + r"\d+\s+" + r"\d+\s+" + r"(\d+)\s+" + r"\d+(?:\.\d+)?\s+" + r"\d+\s+" + r"\d+\s+" + r"\d+") + + tc_time, vpt = re.search(pattern, process.stdout).groups() + + return CflrToolRunResult( + s_edges=int(vpt), + time_sec=float(tc_time), + ram_kb=self.parse_ram_usage_kb(process) + ) diff --git a/cli/runners/graspan_algo_all_pairs_cflr_tool_runner.py b/cli/runners/graspan_algo_all_pairs_cflr_tool_runner.py new file mode 100644 index 0000000..a09ce48 --- /dev/null +++ b/cli/runners/graspan_algo_all_pairs_cflr_tool_runner.py @@ -0,0 +1,65 @@ +import os +import re +import subprocess +from pathlib import Path +from typing import Optional + +import psutil + +from cli.runners.all_pairs_cflr_tool_runner import AbstractAllPairsCflrToolRunner, CflrToolRunResult +from src.grammar.cnf_grammar_template import CnfGrammarTemplate +from src.graph.label_decomposed_graph import LabelDecomposedGraph +from src.problems.Base.template_cfg.utils import explode_indices + + +class GraspanAllPairsCflrToolRunner(AbstractAllPairsCflrToolRunner): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @property + def base_command(self) -> Optional[str]: + grammar = CnfGrammarTemplate.read_from_pocr_cnf_file(self.grammar_path) + graph = LabelDecomposedGraph.read_from_pocr_graph_file(self.graph_path) + + # Graspan doesn't support indexed symbols, we need to concat labels and indices + if graph.block_matrix_space.block_count > 1: + graph, grammar = explode_indices(graph, grammar) + graph_path = self.graph_path.parent / "graspan" / self.graph_path.name + os.makedirs(graph_path.parent, exist_ok=True) + graph.write_to_pocr_graph_file(graph_path) + else: + graph_path = self.graph_path + + # Graspan doesn't support grammars with over 255 symbols, because + # each symbol is encoded with one byte and one symbol is reserved for epsilon + if len(grammar.symbols) > 255: + return None + + grammar_path = self.grammar_path.parent / "graspan" / self.grammar_path.name + os.makedirs(grammar_path.parent, exist_ok=True) + grammar.write_to_pocr_cnf_file(grammar_path, include_starting=False) + + return ( + f'./run {graph_path} {grammar_path} 1 ' + f'{int(psutil.virtual_memory().total / 10**9 * 0.9)} ' + f'{os.cpu_count() * 2}' + ) + + @property + def work_dir(self) -> Optional[Path]: + return Path(os.environ['GRASPAN_DIR']) / "src" + + def parse_results(self, process: subprocess.CompletedProcess[str]) -> CflrToolRunResult: + final_file = re.search(r"finalFile:\s*(.*)", process.stdout).group(1) + start_nonterm = CnfGrammarTemplate.read_from_pocr_cnf_file(self.grammar_path).start_nonterm + with open(final_file, "r") as file: + s_edges = set() + for line in file: + if line.split()[-1] == start_nonterm.label: + s_edges.add((line.split()[0], line.split()[1])) + + return CflrToolRunResult( + s_edges=len(s_edges), + time_sec=float(re.search(r"COMP TIME:\s*([\d.]+|NaN)", process.stdout).group(1)), + ram_kb=self.parse_ram_usage_kb(process) + ) diff --git a/cli/runners/pearl_algo_all_pairs_cflr_tool_runner.py b/cli/runners/pearl_algo_all_pairs_cflr_tool_runner.py new file mode 100644 index 0000000..4122f04 --- /dev/null +++ b/cli/runners/pearl_algo_all_pairs_cflr_tool_runner.py @@ -0,0 +1,38 @@ +import os +import re +import subprocess +from pathlib import Path +from typing import Optional + +from cli.runners.all_pairs_cflr_tool_runner import AbstractAllPairsCflrToolRunner, CflrToolRunResult + + +class PearlAllPairsCflrToolRunner(AbstractAllPairsCflrToolRunner): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @property + def base_command(self) -> Optional[str]: + return ( + f'./{self.grammar_path.stem} {self.graph_path} -pearl -scc=false -gf=false' + if self.grammar_path.stem in {"aa", "vf"} + else None + ) + + @property + def work_dir(self) -> Optional[Path]: + return Path(os.environ['PEARL_DIR']) + + def parse_results(self, process: subprocess.CompletedProcess[str]) -> CflrToolRunResult: + return CflrToolRunResult( + s_edges=self.parse_s_edges(process), + time_sec=float(re.search(r"AnalysisTime\s+([\d.]+)", process.stdout).group(1)), + ram_kb=self.parse_ram_usage_kb(process) + ) + + @staticmethod + def parse_s_edges(process: subprocess.CompletedProcess[str]) -> int: + vedges_search = re.search(r"#VEdges\s+(\d+)", process.stdout) + if vedges_search: + return vedges_search.group(1) + return re.search(r"#AEdges\s+(\d+)", process.stdout).group(1) diff --git a/cli/runners/pocr_algo_all_pairs_cflr_tool_runner.py b/cli/runners/pocr_algo_all_pairs_cflr_tool_runner.py new file mode 100644 index 0000000..9769191 --- /dev/null +++ b/cli/runners/pocr_algo_all_pairs_cflr_tool_runner.py @@ -0,0 +1,25 @@ +import re +import subprocess +from typing import Optional + +from cli.runners.all_pairs_cflr_tool_runner import AbstractAllPairsCflrToolRunner, CflrToolRunResult + + +class PocrAllPairsCflrToolRunner(AbstractAllPairsCflrToolRunner): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @property + def base_command(self) -> Optional[str]: + return ( + f'{self.grammar_path.stem} -pocr {self.graph_path}' + if self.grammar_path.stem in {"aa", "vf"} + else f'cfl -pocr {self.grammar_path} {self.graph_path}' + ) + + def parse_results(self, process: subprocess.CompletedProcess[str]) -> CflrToolRunResult: + return CflrToolRunResult( + s_edges=int(re.search(r"#(SEdges|CountEdges)\s+(\d+)", process.stdout).group(2)), + time_sec=float(re.search(r"AnalysisTime\s+([\d.]+)", process.stdout).group(1)), + ram_kb=self.parse_ram_usage_kb(process) + ) diff --git a/cli/runners/py_algo_all_pairs_cflr_tool_runner.py b/cli/runners/py_algo_all_pairs_cflr_tool_runner.py new file mode 100644 index 0000000..a629062 --- /dev/null +++ b/cli/runners/py_algo_all_pairs_cflr_tool_runner.py @@ -0,0 +1,21 @@ +import re +import subprocess +from typing import Optional + +from cli.runners.all_pairs_cflr_tool_runner import AbstractAllPairsCflrToolRunner, CflrToolRunResult + + +class PyAlgoAllPairsCflrToolRunner(AbstractAllPairsCflrToolRunner): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @property + def base_command(self) -> Optional[str]: + return f"python3 -m cli.run_all_pairs_cflr {self.algo_settings} {self.graph_path} {self.grammar_path}" + + def parse_results(self, process: subprocess.CompletedProcess[str]) -> CflrToolRunResult: + return CflrToolRunResult( + s_edges=int(re.search(r"#(SEdges|CountEdges)\s+(\d+)", process.stdout).group(2)), + time_sec=float(re.search(r"AnalysisTime\s+([\d.]+)", process.stdout).group(1)), + ram_kb=self.parse_ram_usage_kb(process) + )