-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #225 from mlcommons/repository_check
- Loading branch information
Showing
5 changed files
with
156 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# MLPerf repository checker | ||
|
||
MLPerf repository checker | ||
|
||
## Usage | ||
|
||
To check whether an organization's submission package is compatible with github | ||
and whether it will cause any problems when added to github with a PR during the | ||
review process. | ||
|
||
```sh | ||
python3 -m mlperf_logging.repo_checker FOLDER USAGE RULESET | ||
``` | ||
|
||
Currently, USAGE in ["training"] and RULESET in ["0.6.0", "0.7.0", "1.0.0"] are supported. | ||
|
||
The repo checker checks: | ||
1. Whether the repo contains filenames that github does not like, e.g. files with spaces, | ||
files that start with '.' or '/.' | ||
2. Files that violate the github file limit (50MB) | ||
|
||
## Tested software versions | ||
Tested and confirmed working using the following software versions: | ||
|
||
Python 3.9.9 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from . import repo_checker | ||
|
||
repo_checker.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import argparse | ||
import logging | ||
import os | ||
import subprocess | ||
|
||
|
||
def _check_bad_filenames(submission_dir): | ||
"""Checks for filename errors. | ||
Git does not like filenames with spaces or that start with ., or /. . | ||
""" | ||
logging.info('Running git-unfriendly file name checks.') | ||
names = [ | ||
os.path.join(dirpath, filename) | ||
for dirpath, _, filenames in os.walk(submission_dir) | ||
for filename in filenames | ||
if filename.startswith(".") or "/." in filename or " " in filename | ||
] | ||
if len(names) > 0: | ||
error = "\n".join(names) | ||
logging.error('Files with git-unfriendly name: %s ', error) | ||
logging.error('Please remove spaces from filenamed and make sure they do not start with ".", or "/."') | ||
return False | ||
return True | ||
|
||
|
||
def _check_file_sizes(submission_dir): | ||
"""Checks for large file sizes. | ||
Git does not like file sizes > 50MB. | ||
""" | ||
logging.info('Running large file checks.') | ||
out = subprocess.run( | ||
[ | ||
"find", | ||
submission_dir, | ||
"-type", | ||
"f", | ||
"-size", | ||
"+50M", | ||
], | ||
capture_output=True, | ||
text=True, | ||
) | ||
if len(out.stdout) != 0: | ||
logging.error('Files > 50MB: %s', out.stdout) | ||
logging.error('Please remove or reduce the size of these files.') | ||
return False | ||
return True | ||
|
||
|
||
def run_checks(submission_dir): | ||
"""Top-level checker function. | ||
Call individual checkers from this function. | ||
""" | ||
logging.info('Running repository checks.') | ||
|
||
bad_filename_error = _check_bad_filenames(submission_dir) | ||
large_file_error = _check_file_sizes(submission_dir) | ||
|
||
if not (bad_filename_error and large_file_error): | ||
logging.info('CHECKS FAILED.') | ||
return False | ||
|
||
logging.info('ALL CHECKS PASSED.') | ||
return False | ||
|
||
|
||
def get_parser(): | ||
"""Parse commandline.""" | ||
parser = argparse.ArgumentParser( | ||
prog='mlperf_logging.repo_checker', | ||
description='Sanity checks to make sure that package is github compliant.', | ||
) | ||
|
||
parser.add_argument( | ||
'folder', | ||
type=str, | ||
help='the folder for a submission package.', | ||
) | ||
parser.add_argument( | ||
'usage', | ||
type=str, | ||
choices=['training'], | ||
help='the usage -- only training is currently supported.', | ||
) | ||
parser.add_argument( | ||
'ruleset', | ||
type=str, | ||
choices=['2.0.0'], | ||
help='the ruleset. Only 2.0.0 is currently supported.' | ||
) | ||
parser.add_argument( | ||
'--log_output', | ||
type=str, | ||
default='repo_checker.log', | ||
help='the ruleset. Only 2.0.0 is currently supported.' | ||
) | ||
return parser | ||
|
||
|
||
def main(): | ||
parser = get_parser() | ||
args = parser.parse_args() | ||
|
||
logging.basicConfig(filename=args.log_output, level=logging.INFO) | ||
logging.getLogger().addHandler(logging.StreamHandler()) | ||
formatter = logging.Formatter("%(levelname)s - %(message)s") | ||
logging.getLogger().handlers[0].setFormatter(formatter) | ||
logging.getLogger().handlers[1].setFormatter(formatter) | ||
|
||
valid = run_checks(args.folder) | ||
return valid | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,18 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
python3 -m mlperf_logging.package_checker $1 training 2.0.0 | ||
|
||
# rcp_bypass and rcp_bert_train_samples packahe checker params | ||
# need to be retrieved at package_checker_params file at top-level submission dir. | ||
PACKAGE_CHECKER_PARAMS="" | ||
PACKAGE_CHECKER_PARAMS_FILE="$1/package_checker_params" | ||
if test -f "$PACKAGE_CHECKER_PARAMS_FILE"; then | ||
while IFS= read -r line | ||
do | ||
PACKAGE_CHECKER_PARAMS="$PACKAGE_CHECKER_PARAMS --$line" | ||
done < "$PACKAGE_CHECKER_PARAMS_FILE" | ||
fi | ||
|
||
python3 -m mlperf_logging.package_checker $1 training 2.0.0 $PACKAGE_CHECKER_PARAMS | ||
python3 -m mlperf_logging.result_summarizer $1 training 2.0.0 | ||
python3 -m mlperf_logging.repo_checker $1 training 2.0.0 |