Skip to content

Commit

Permalink
Renaming the transform to code-profiler from syntactic-concept-extractor
Browse files Browse the repository at this point in the history
Signed-off-by: Pankaj Thorat <[email protected]>
  • Loading branch information
pankajskku committed Oct 15, 2024
1 parent 892283b commit 39158a5
Show file tree
Hide file tree
Showing 549 changed files with 96 additions and 392 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files
#
name: Test - transforms/code/syntactic_concept_extractor
name: Test - transforms/code/code_profiler

on:
workflow_dispatch:
Expand All @@ -12,9 +12,9 @@ on:
tags:
- "*"
paths:
- "transforms/code/syntactic_concept_extractor/**"
- "transforms/code/code_profiler/**"
- "data-processing-lib/**"
- "!transforms/code/syntactic_concept_extractor/**/kfp_ray/**" # This is/will be tested in separate workflow
- "!transforms/code/code_profiler/**/kfp_ray/**" # This is/will be tested in separate workflow
- "!data-processing-lib/**/test/**"
- "!data-processing-lib/**/test-data/**"
- "!**.md"
Expand All @@ -26,9 +26,9 @@ on:
- "dev"
- "releases/**"
paths:
- "transforms/code/syntactic_concept_extractor/**"
- "transforms/code/code_profiler/**"
- "data-processing-lib/**"
- "!transforms/code/syntactic_concept_extractor/**/kfp_ray/**" # This is/will be tested in separate workflow
- "!transforms/code/code_profiler/**/kfp_ray/**" # This is/will be tested in separate workflow
- "!data-processing-lib/**/test/**"
- "!data-processing-lib/**/test-data/**"
- "!**.md"
Expand Down Expand Up @@ -72,12 +72,12 @@ jobs:
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- name: Test transform source in transforms/code/syntactic_concept_extractor
- name: Test transform source in transforms/code/code_profiler
run: |
if [ -e "transforms/code/syntactic_concept_extractor/Makefile" ]; then
make -C transforms/code/syntactic_concept_extractor DOCKER=docker test-src
if [ -e "transforms/code/code_profiler/Makefile" ]; then
make -C transforms/code/code_profiler DOCKER=docker test-src
else
echo "transforms/code/syntactic_concept_extractor/Makefile not found - source testing disabled for this transform."
echo "transforms/code/code_profiler/Makefile not found - source testing disabled for this transform."
fi
test-image:
needs: [check_if_push_image]
Expand All @@ -99,15 +99,15 @@ jobs:
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- name: Test transform image in transforms/code/syntactic_concept_extractor
- name: Test transform image in transforms/code/code_profiler
run: |
if [ -e "transforms/code/syntactic_concept_extractor/Makefile" ]; then
if [ -d "transforms/code/syntactic_concept_extractor/spark" ]; then
if [ -e "transforms/code/code_profiler/Makefile" ]; then
if [ -d "transforms/code/code_profiler/spark" ]; then
make -C data-processing-lib/spark DOCKER=docker image
fi
make -C transforms/code/syntactic_concept_extractor DOCKER=docker test-image
make -C transforms/code/code_profiler DOCKER=docker test-image
else
echo "transforms/code/syntactic_concept_extractor/Makefile not found - testing disabled for this transform."
echo "transforms/code/code_profiler/Makefile not found - testing disabled for this transform."
fi
- name: Print space
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
Expand All @@ -117,8 +117,8 @@ jobs:
- name: Publish images
if: needs.check_if_push_image.outputs.publish_images == 'true'
run: |
if [ -e "transforms/code/syntactic_concept_extractor/Makefile" ]; then
make -C transforms/code/syntactic_concept_extractor publish
if [ -e "transforms/code/code_profiler/Makefile" ]; then
make -C transforms/code/code_profiler publish
else
echo "transforms/code/syntactic_concept_extractor/Makefile not found - publishing disabled for this transform."
echo "transforms/code/code_profiler/Makefile not found - publishing disabled for this transform."
fi
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Syntactic Construct Extractor
# Code Profiler Tranform

This module extracts the base syntactic concepts from the multi-language source codes and represent these concepts in an unified langauge-agnostic representation that can be further used for multi-lnaguage data profiling. While programming languages expose similar syntactic building blocks to represent programming intent, such as importing packages/libraries, functions, classes, loops, conditionals, comments and others, these concepts are expressed through language-specific grammar, defined by distinct keywords and syntactic form. Our framework abstracts language-specific concepts by transforming them into a unified, language-agnostic representation called universal base syntactic representation (UBSR), referred to as a concept, which is consistently encoded within the proposed schema structure. The current version support the base syntactic concept for importing/including package/libraries, comments, functions.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ COPY --chown=dpk:root README.md README.md
RUN pip install --no-cache-dir -e .

# copy the main() entry point to the image
COPY ./src/syntactic_concept_extractor_transform_python.py .
COPY ./src/code_profiler_transform_python.py .

# copy some of the samples in
COPY ./src/syntactic_concept_extractor_local.py local/
COPY ./src/code_profiler_local.py local/

# Copy the tree-sitter bindings (this is the important part)
COPY --chown=ray:users ../../input/tree-sitter-bindings/ /home/dpk/input/tree-sitter-bindings/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ publish-image:: .transforms.publish-image-python
setup:: .transforms.setup

set-versions:
$(MAKE) TRANSFORM_PYTHON_VERSION=$(SYNTACTIC_CONCEPT_EXTRACTOR_PYTHON_VERSION) TOML_VERSION=$(SYNTACTIC_CONCEPT_EXTRACTOR_PYTHON_VERSION) .transforms.set-versions
$(MAKE) TRANSFORM_PYTHON_VERSION=$(CODE_PROFILER_PYTHON_VERSION) TOML_VERSION=$(CODE_PROFILER_PYTHON_VERSION) .transforms.set-versions

build-dist:: .defaults.build-dist

Expand All @@ -50,6 +50,6 @@ RUN_ARGS ?= ""
run-local-sample: .transforms.run-local-sample

run-local-python-sample:
$(MAKE) RUN_FILE=syntactic_concept_extractor_local_python.py \
$(MAKE) RUN_FILE=code_profiler_local_python.py \
RUN_ARGS="--content 'Contents' --language 'Language'" \
.transforms.run-local-python-sample
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Base Syntactic Concept Extractor Transform
# Code Profiler Transform


## Configuration and command line Options

The set of dictionary keys holding [SyntacticConceptExtractorTransform](src/syntactic_concept_extractor_transform.py)
The set of dictionary keys holding [code_profiler_transform](src/code_profiler_transform.py)
configuration for values are as follows:

* content - specifies the column name in the dataframe that has the code snippet
Expand All @@ -19,8 +19,8 @@ the [python launcher](../../../../data-processing-lib/doc/python-launcher-option
### Running the samples
To run the samples, use the following `make` targets

* `run-local-sample` - runs src/syntactic_concept_extractor_local.py
* `run-local-python-sample` - runs src/syntactic_concept_extractor_local_python.py
* `run-local-sample` - runs src/code_profiler_local.py
* `run-local-python-sample` - runs src/code_profiler_local_python.py

These targets will activate the virtual environment and set up any configuration needed.
Use the `-n` option of `make` to see the detail of what is done to run the sample.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[project]
name = "dpk_syntactic_concept_extractor_transform_python"
name = "dpk_code_profiler_transform_python"
version = "0.2.2.dev0"
requires-python = ">=3.10"
description = "Syntactic Concept Extractor Python Transform"
description = "Code Profiler Python Transform"
license = {text = "Apache-2.0"}
readme = {file = "README.md", content-type = "text/markdown"}
authors = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


from data_processing.data_access import DataAccessLocal
from syntactic_concept_extractor_transform import SyntacticConceptExtractorTransform
from code_profiler_transform import CodeProfilerTransform

# create parameters
input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../", "../", "input"))
Expand Down Expand Up @@ -46,10 +46,10 @@ def save_tables_to_parquet(table_list, output_folder, base_filename):
print(f"Table {idx} saved to {output_file}")

if __name__ == "__main__":
print("Syntactic constructs extraction started")
print("Code profiling started")
# Here we show how to run outside of the runtime
# Create and configure the transform.
profiler = SyntacticConceptExtractorTransform(profiler_params)
profiler = CodeProfilerTransform(profiler_params)
# Use the local data access to read a parquet table.
data_access = DataAccessLocal()
table, other_val = data_access.get_table(os.path.join(input_folder, profiler_params.get("input")))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from data_processing.runtime.pure_python import PythonTransformLauncher
from data_processing.utils import ParamsUtils
from syntactic_concept_extractor_transform_python import SyntacticConceptExtractorPythonTransformConfiguration
from code_profiler_transform_python import CodeProfilerPythonTransformConfiguration

# create parameters
input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../", "../", "input"))
Expand All @@ -37,10 +37,10 @@
if __name__ == "__main__":
# Set the simulated command line args
sys.argv = ParamsUtils.dict_to_req(d=params)
print("In syntactic_concept_extractor_local_python", sys.argv)
print("In code_profiler_local_python", sys.argv)
# create launcher
launcher = PythonTransformLauncher(runtime_config=SyntacticConceptExtractorPythonTransformConfiguration())
print("In syntactic_concept_extractor_local_python: Launcher created")
launcher = PythonTransformLauncher(runtime_config=CodeProfilerPythonTransformConfiguration())
print("In code_profiler_local_python: Launcher created")

# Launch the ray actor(s) to process the input
launcher.launch()
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@
get_logger,
)

short_name = "SyntacticConceptExtractor"
short_name = "CodeProfiler"
cli_prefix = f"{short_name}_"
language_key = "language"
contents_key = "contents"
language_cli_param = f"{cli_prefix}{language_key}"
contents_cli_param = f"{cli_prefix}{contents_key}"

class SyntacticConceptExtractorTransform(AbstractTableTransform):
class CodeProfilerTransform(AbstractTableTransform):
"""
Implements a simple copy of a pyarrow Table.
"""
Expand Down Expand Up @@ -182,8 +182,8 @@ def get_uast_parquet():

return [table_with_uast], stats

class SyntacticConceptExtractorTransformConfiguration(TransformConfiguration):
def __init__(self, transform_class: type[AbstractBinaryTransform] = SyntacticConceptExtractorTransform):
class CodeProfilerTransformConfiguration(TransformConfiguration):
def __init__(self, transform_class: type[AbstractBinaryTransform] = CodeProfilerTransform):
super().__init__(
name=short_name,
transform_class=transform_class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@

import time

from syntactic_concept_extractor_transform import (
SyntacticConceptExtractorTransform,
SyntacticConceptExtractorTransformConfiguration,
from code_profiler_transform import (
CodeProfilerTransform,
CodeProfilerTransformConfiguration,
)
from data_processing.runtime.pure_python import PythonTransformLauncher
from data_processing.runtime.pure_python.runtime_configuration import (
Expand All @@ -25,7 +25,7 @@
logger = get_logger(__name__)


class SyntacticConceptExtractorPythonTransformConfiguration(PythonTransformRuntimeConfiguration):
class CodeProfilerPythonTransformConfiguration(PythonTransformRuntimeConfiguration):
"""
Implements the PythonTransformConfiguration for NOOP as required by the PythonTransformLauncher.
NOOP does not use a RayRuntime class so the superclass only needs the base
Expand All @@ -37,11 +37,11 @@ def __init__(self):
Initialization
:param base_configuration - base configuration class
"""
super().__init__(transform_config=SyntacticConceptExtractorTransformConfiguration(transform_class=SyntacticConceptExtractorTransform))
super().__init__(transform_config=CodeProfilerTransformConfiguration(transform_class=CodeProfilerTransform))

if __name__ == "__main__":
# launcher = NOOPRayLauncher()
print("In syntactic_concept_extractor_transform_python")
launcher = PythonTransformLauncher(SyntacticConceptExtractorPythonTransformConfiguration())
logger.info("Launching SyntacticConceptExtractor transform")
print("In code_profiler_transform_python")
launcher = PythonTransformLauncher(CodeProfilerPythonTransformConfiguration())
logger.info("Launching CodeProfiler transform")
launcher.launch()
Loading

0 comments on commit 39158a5

Please sign in to comment.