From 1eaea3d4a4d08011434480b981e2873f40d20384 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:19:32 -0800 Subject: [PATCH 01/86] flake8 and validate path --- argschema/fields/files.py | 5 +++++ argschema/utils.py | 4 ---- test/fields/test_numpyarray.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/argschema/fields/files.py b/argschema/fields/files.py index cdc3d98b..d833b655 100644 --- a/argschema/fields/files.py +++ b/argschema/fields/files.py @@ -117,6 +117,11 @@ def _validate(self, value): # use outputfile to test that a file in this location is a valid path validate_outpath(value) +def validate_input_path(value): + if not os.path.isfile(value): + raise mm.ValidationError("%s is not a file" % value) + elif not os.access(value, os.R_OK): + raise mm.ValidationError("%s is not readable" % value) def validate_input_path(value): if not os.path.isfile(value): diff --git a/argschema/utils.py b/argschema/utils.py index e7b81051..5d7144eb 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -403,12 +403,10 @@ def load(schema, d): schema that you want to use to validate d: dict dictionary to validate and load - Returns ------- dict deserialized and validated dictionary - Raises ------ marshmallow.ValidationError @@ -433,12 +431,10 @@ def dump(schema, d): schema that you want to use to validate and dump d: dict dictionary to validate and dump - Returns ------- dict serialized and validated dictionary - Raises ------ marshmallow.ValidationError diff --git a/test/fields/test_numpyarray.py b/test/fields/test_numpyarray.py index 6dddafb9..d44723af 100644 --- a/test/fields/test_numpyarray.py +++ b/test/fields/test_numpyarray.py @@ -1,7 +1,7 @@ import pytest from argschema import ArgSchemaParser, ArgSchema from argschema.fields import NumpyArray -from argschema.utils import dump +from argschema.utils import load,dump import marshmallow as mm import numpy as np From 034cb74199ff956353e6376536dac4875f669190 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 23 Apr 2018 07:49:14 -0700 Subject: [PATCH 02/86] flake8 warning cleanup --- argschema/fields/files.py | 1 + argschema/utils.py | 8 -------- test/fields/test_numpyarray.py | 2 +- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/argschema/fields/files.py b/argschema/fields/files.py index d833b655..fd30a406 100644 --- a/argschema/fields/files.py +++ b/argschema/fields/files.py @@ -117,6 +117,7 @@ def _validate(self, value): # use outputfile to test that a file in this location is a valid path validate_outpath(value) + def validate_input_path(value): if not os.path.isfile(value): raise mm.ValidationError("%s is not a file" % value) diff --git a/argschema/utils.py b/argschema/utils.py index 5d7144eb..d365715c 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -407,10 +407,6 @@ def load(schema, d): ------- dict deserialized and validated dictionary - Raises - ------ - marshmallow.ValidationError - if the dictionary does not conform to the schema """ results = schema.load(d) @@ -435,10 +431,6 @@ def dump(schema, d): ------- dict serialized and validated dictionary - Raises - ------ - marshmallow.ValidationError - if the dictionary does not conform to the schema """ results = schema.dump(d) diff --git a/test/fields/test_numpyarray.py b/test/fields/test_numpyarray.py index d44723af..6dddafb9 100644 --- a/test/fields/test_numpyarray.py +++ b/test/fields/test_numpyarray.py @@ -1,7 +1,7 @@ import pytest from argschema import ArgSchemaParser, ArgSchema from argschema.fields import NumpyArray -from argschema.utils import load,dump +from argschema.utils import dump import marshmallow as mm import numpy as np From 4d9d68ae65ed6f794d82a4e5a7a16771537193cc Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 23 Apr 2018 08:22:03 -0700 Subject: [PATCH 03/86] bump to 2.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f4d46ee8..9c5b51c9 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ test_required = f.read().splitlines() setup(name='argschema', - version='1.17.6', + version='2.0.0', description=' a wrapper for setting up modules that can have parameters specified by command line arguments,\ json_files, or dictionary objects. Providing a common wrapper for data processing modules.', author='Forrest Collman,David Feng', From afa3e30ea09e321f0a42e044d115a44db061a997 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 23 Apr 2018 08:56:31 -0700 Subject: [PATCH 04/86] removing deprecated --- argschema/__init__.py | 8 ++--- argschema/argschema_parser.py | 56 --------------------------------- argschema/deprecated.py | 12 ------- argschema/fields/__init__.py | 15 +++++---- argschema/fields/deprecated.py | 34 -------------------- argschema/fields/numpyarrays.py | 2 -- argschema/utils.py | 18 +---------- docs/api/argschema.rst | 8 ----- docs/tests/fields.rst | 8 ----- docs/user/intro.rst | 19 ----------- examples/cli_example.py | 2 -- examples/deprecated_example.py | 15 --------- test/fields/test_deprecated.py | 25 --------------- test/test_cli_overrides.py | 26 +-------------- test/test_first_test.py | 24 +------------- test/test_utils.py | 11 +++---- 16 files changed, 17 insertions(+), 266 deletions(-) delete mode 100644 argschema/deprecated.py delete mode 100644 argschema/fields/deprecated.py delete mode 100644 examples/deprecated_example.py delete mode 100644 test/fields/test_deprecated.py diff --git a/argschema/__init__.py b/argschema/__init__.py index 984ab7a6..3f4c03e3 100644 --- a/argschema/__init__.py +++ b/argschema/__init__.py @@ -1,9 +1,7 @@ '''argschema: flexible definition, validation and setting of parameters''' -from .fields import InputFile, InputDir, OutputFile, OptionList # noQA:F401 -from .schemas import ArgSchema # noQA:F401 -from .argschema_parser import ArgSchemaParser # noQA:F401 -from .deprecated import JsonModule, ModuleParameters # noQA:F401 - +from .fields import InputFile, InputDir, OutputFile +from .schemas import ArgSchema +from .argschema_parser import ArgSchemaParser def main(): # pragma: no cover jm = ArgSchemaParser() diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 01ae12b9..6edb4ee8 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -3,7 +3,6 @@ ''' import json import logging -import copy from . import schemas from . import utils from . import fields @@ -66,47 +65,6 @@ def is_recursive_schema(schema, schema_list=[]): return False -def fill_defaults(schema, args): - """DEPRECATED, function to fill in default values from schema into args - bug: goes into an infinite loop when there is a recursively defined schema - - Parameters - ---------- - schema : marshmallow.Schema - schema to get defaults from - args : - - - Returns - ------- - dict - dictionary with missing default values filled in - - """ - - defaults = [] - - # find all of the schema entries with default values - schemata = [(schema, [])] - while schemata: - subschema, path = schemata.pop() - for k, v in subschema.declared_fields.items(): - if isinstance(v, mm.fields.Nested): - schemata.append((v.schema, path + [k])) - elif v.default != mm.missing: - defaults.append((path + [k], v.default)) - - # put the default entries into the args dictionary - args = copy.deepcopy(args) - for path, val in defaults: - d = args - for path_item in path[:-1]: - d = d.setdefault(path_item, {}) - if path[-1] not in d: - d[path[-1]] = val - return args - - class ArgSchemaParser(object): """The main class you should sub-class to write your own argschema module. Takes input_data, reference to a input_json and the command line inputs and parses out the parameters @@ -256,20 +214,6 @@ def load_schema_with_defaults(self, schema, args): because these won't work with loading defaults. """ - is_recursive = is_recursive_schema(schema) - is_non_default = contains_non_default_schemas(schema) - if (not is_recursive) and is_non_default: - # throw a warning - self.logger.warning("""DEPRECATED:You are using a Schema which contains - a Schema which is not subclassed from argschema.DefaultSchema, - default values will not work correctly in this case, - this use is deprecated, and future versions will not fill in default - values when you use non-DefaultSchema subclasses""") - args = fill_defaults(schema, args) - if is_recursive and is_non_default: - raise mm.ValidationError( - 'Recursive schemas need to subclass argschema.DefaultSchema else defaults will not work') - # load the dictionary via the schema result = utils.load(schema, args) diff --git a/argschema/deprecated.py b/argschema/deprecated.py deleted file mode 100644 index 6d8ce615..00000000 --- a/argschema/deprecated.py +++ /dev/null @@ -1,12 +0,0 @@ -from .argschema_parser import ArgSchemaParser -from .schemas import ArgSchema - - -class JsonModule(ArgSchemaParser): - """deprecated name of ArgSchemaParser""" - pass - - -class ModuleParameters(ArgSchema): - """deprecated name of ArgSchema""" - pass diff --git a/argschema/fields/__init__.py b/argschema/fields/__init__.py index c752233c..5adb5886 100644 --- a/argschema/fields/__init__.py +++ b/argschema/fields/__init__.py @@ -1,14 +1,13 @@ '''sub-module for custom marshmallow fields of general utility''' -from marshmallow.fields import * # noQA:F401 -from marshmallow.fields import __all__ as __mmall__ # noQA:F401 -from .files import OutputFile, InputDir, InputFile, OutputDir # noQA:F401 -from .numpyarrays import NumpyArray # noQA:F401 -from .deprecated import OptionList # noQA:F401 -from .loglevel import LogLevel # noQA:F401 -from .slice import Slice # noQA:F401 +from marshmallow.fields import * +from marshmallow.fields import __all__ as __mmall__ +from .files import OutputFile, InputDir, InputFile, OutputDir +from .numpyarrays import NumpyArray +from .loglevel import LogLevel +from .slice import Slice __all__ = __mmall__ + ['OutputFile', 'InputDir', 'InputFile', 'OutputDir', - 'NumpyArray', 'OptionList', 'LogLevel', 'Slice'] + 'NumpyArray','LogLevel', 'Slice'] # Python 2 subpackage (not module) * imports break if items in __all__ # are unicode. diff --git a/argschema/fields/deprecated.py b/argschema/fields/deprecated.py deleted file mode 100644 index 717b876b..00000000 --- a/argschema/fields/deprecated.py +++ /dev/null @@ -1,34 +0,0 @@ -'''marshmallow fields related to choosing amongst a set of options''' -import marshmallow as mm -import logging -logger = logging.getLogger('argschema') - - -class OptionList(mm.fields.Field): - """OptionList is a marshmallow field which enforces that this field - is one of a finite set of options. - OptionList(options,*args,**kwargs) where options is a list of - json compatible options which this option will be enforced to belong - - Parameters - ---------- - options : list - A list of python objects of which this field must be one of - kwargs : dict - the same as any :class:`Field` receives - """ - - def __init__(self, options, **kwargs): - self.options = options - logger.warning( - 'DEPRECATED: use validate=mm.validate.OneOf([a,b,c...]) in field definition instead') - super(OptionList, self).__init__(**kwargs) - - def _serialize(self, value, attr, obj): - return value - - def _validate(self, value): - if value not in self.options: - raise mm.ValidationError("%s is not a valid option" % value) - - return value diff --git a/argschema/fields/numpyarrays.py b/argschema/fields/numpyarrays.py index 6f9969ed..b4382b4c 100644 --- a/argschema/fields/numpyarrays.py +++ b/argschema/fields/numpyarrays.py @@ -20,8 +20,6 @@ class NumpyArray(mm.fields.List): def __init__(self, dtype=None, *args, **kwargs): self.dtype = dtype - if "cli_as_single_argument" not in kwargs: - kwargs["cli_as_single_argument"] = True super(NumpyArray, self).__init__(mm.fields.Field, *args, **kwargs) def _deserialize(self, value, attr, obj): diff --git a/argschema/utils.py b/argschema/utils.py index d365715c..5ffe3af1 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -2,7 +2,6 @@ marshmallow schemas to argparse and merging dictionaries from both systems ''' import logging -import warnings import ast import argparse from operator import add @@ -55,11 +54,7 @@ def get_type_from_field(field): callable Function to call to cast argument to """ - if (isinstance(field, fields.List) and - not field.metadata.get("cli_as_single_argument", False)): - return list - else: - return FIELD_TYPE_MAP.get(type(field), str) + return FIELD_TYPE_MAP.get(type(field), str) def cli_error_dict(arg_path, field_type, index=0): @@ -335,17 +330,6 @@ def build_schema_arguments(schema, arguments=None, path=None, description=None): if isinstance(validator, mm.validate.OneOf): arg['help'] += " (valid options are {})".format(validator.choices) - if (isinstance(field, mm.fields.List) and - not field.metadata.get("cli_as_single_argument", False)): - warn_msg = ("'{}' is using old-style command-line syntax with " - "each element as a separate argument. This will " - "not be supported in argschema after " - "2.0. See http://argschema.readthedocs.io/en/" - "master/user/intro.html#command-line-specification" - " for details.").format(arg_name) - warnings.warn(warn_msg, FutureWarning) - arg['nargs'] = '*' - # do type mapping after parsing so we can raise validation errors arg['type'] = str diff --git a/docs/api/argschema.rst b/docs/api/argschema.rst index a8660473..a17ec924 100644 --- a/docs/api/argschema.rst +++ b/docs/api/argschema.rst @@ -19,14 +19,6 @@ argschema\.argschema\_parser module :undoc-members: :show-inheritance: -argschema\.deprecated module ----------------------------- - -.. automodule:: argschema.deprecated - :members: - :undoc-members: - :show-inheritance: - argschema\.schemas module ------------------------- diff --git a/docs/tests/fields.rst b/docs/tests/fields.rst index 933bb8b2..d5c65a9d 100644 --- a/docs/tests/fields.rst +++ b/docs/tests/fields.rst @@ -4,14 +4,6 @@ fields package Submodules ---------- -fields\.test\_deprecated module -------------------------------- - -.. automodule:: fields.test_deprecated - :members: - :undoc-members: - :show-inheritance: - fields\.test\_files module -------------------------- diff --git a/docs/user/intro.rst b/docs/user/intro.rst index c8235309..04138f7c 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -158,25 +158,6 @@ passed by the shell. If there are spaces in the value, it will need to be wrapped in quotes, and any special characters will need to be escaped with \. Booleans are set with True or 1 for true and False or 0 for false. -An exception to this rule is list formatting. If a schema contains a -:class:`~marshmallow.fields.List` and does not set the -`cli_as_single_argument` keyword argument to True, lists will be parsed -as `--list_name ...`. In argschema 2.0 lists will be -parsed in the same way as other arguments, as it allows more flexibility -in list types and more clearly represents the intended data structure. - -An example script showing old and new list settings: - -.. literalinclude:: ../../examples/deprecated_example.py - :caption: deprecated_example.py - -Running this code can demonstrate the differences in command-line usage: - -.. command-output:: python deprecated_example.py --help - :cwd: /../examples - -.. command-output:: python deprecated_example.py --list_old 9.1 8.2 7.3 --list_new [6.4,5.5,4.6] - :cwd: /../examples We can explore some typical examples of command line usage with the following script: diff --git a/examples/cli_example.py b/examples/cli_example.py index 5a71323e..b7cb6968 100644 --- a/examples/cli_example.py +++ b/examples/cli_example.py @@ -13,10 +13,8 @@ class MySchema(ArgSchema): description="my example array") string_list = List(List(Str), default=[["hello", "world"], ["lists!"]], - cli_as_single_argument=True, description="list of lists of strings") int_list = List(Int, default=[1, 2, 3], - cli_as_single_argument=True, description="list of ints") nested = Nested(MyNestedSchema, required=True) diff --git a/examples/deprecated_example.py b/examples/deprecated_example.py deleted file mode 100644 index bea1e12c..00000000 --- a/examples/deprecated_example.py +++ /dev/null @@ -1,15 +0,0 @@ -from argschema import ArgSchema, ArgSchemaParser -from argschema.fields import List, Float - - -class MySchema(ArgSchema): - list_old = List(Float, default=[1.1, 2.2, 3.3], - description="float list with deprecated cli") - list_new = List(Float, default=[4.4, 5.5, 6.6], - cli_as_single_argument=True, - description="float list with supported cli") - - -if __name__ == '__main__': - mod = ArgSchemaParser(schema_type=MySchema) - print(mod.args) diff --git a/test/fields/test_deprecated.py b/test/fields/test_deprecated.py deleted file mode 100644 index a678074b..00000000 --- a/test/fields/test_deprecated.py +++ /dev/null @@ -1,25 +0,0 @@ -import pytest -from argschema import ArgSchemaParser, ArgSchema -from argschema.fields import OptionList -import marshmallow as mm - - -class OptionSchema(ArgSchema): - a = OptionList([1, 2, 3], required=True, description='one of 1,2,3') - - -def test_option_list(): - input_data = { - 'a': 1 - } - ArgSchemaParser( - input_data=input_data, schema_type=OptionSchema, args=[]) - - -def test_bad_option(): - input_data = { - 'a': 4 - } - with pytest.raises(mm.ValidationError): - ArgSchemaParser( - input_data=input_data, schema_type=OptionSchema, args=[]) diff --git a/test/test_cli_overrides.py b/test/test_cli_overrides.py index bd63996d..d2240908 100644 --- a/test/test_cli_overrides.py +++ b/test/test_cli_overrides.py @@ -48,7 +48,6 @@ def test_data(inputdir, inputfile, outputdir, outputfile): "inputfile": str(inputfile), "integer": 10, "list": [300, 200, 800, 1000], - "list_deprecated": [300, 200, 800, 1000], "localdatetime": "0001-01-01T00:00:00", "log_level": "ERROR", "nested": {"a": 1, "b": False}, @@ -67,14 +66,6 @@ def test_data(inputdir, inputfile, outputdir, outputfile): return data -@pytest.fixture -def deprecated_data(): - data = { - "list_deprecated": [300, 200, 800, 1000], - } - return data - - class MyNestedSchema(DefaultSchema): a = fields.Int(required=True) b = fields.Boolean(required=True) @@ -91,7 +82,7 @@ class MySchema(ArgSchema): inputdir = fields.InputDir(required=True) inputfile = fields.InputFile(required=True) integer = fields.Int(required=True) - list = fields.List(fields.Int, required=True, cli_as_single_argument=True) + list = fields.List(fields.Int, required=True) localdatetime = fields.LocalDateTime(required=True) nested = fields.Nested(MyNestedSchema, required=True) number = fields.Number(required=True) @@ -107,10 +98,6 @@ class MySchema(ArgSchema): uuid = fields.UUID(required=True) -class MyDeprecatedSchema(ArgSchema): - list_deprecated = fields.List(fields.Int, required=True) - - def test_unexpected_input(test_data): with pytest.raises(SystemExit): ArgSchemaParser(test_data, schema_type=MySchema, @@ -225,17 +212,6 @@ def test_override_list(test_data): args=["--list", "invalid"]) -def test_override_list_deprecated(deprecated_data): - with pytest.warns(FutureWarning): - mod = ArgSchemaParser(deprecated_data, schema_type=MyDeprecatedSchema, - args=["--list_deprecated", "1000", "3000"]) - assert(mod.args["list_deprecated"] == [1000, 3000]) - with pytest.raises(mm.ValidationError): - mod = ArgSchemaParser(deprecated_data, - schema_type=MyDeprecatedSchema, - args=["--list_deprecated", "[1000,3000]"]) - - def test_override_localdatetime(test_data): mod = ArgSchemaParser(test_data, schema_type=MySchema, args=["--localdatetime", "1977-05-04T00:00:00"]) diff --git a/test/test_first_test.py b/test/test_first_test.py index 9b6e3089..b7a94b28 100644 --- a/test/test_first_test.py +++ b/test/test_first_test.py @@ -125,7 +125,7 @@ def test_simple_extension_write_overwrite(simple_extension_file): def test_simple_extension_write_overwrite_list(simple_extension_file): args = ['--input_json', str(simple_extension_file), - '--test.d', '6', '7', '8', '9'] + '--test.d', "[6,7,8,9]"] mod = ArgSchemaParser(schema_type=SimpleExtension, args=args) assert len(mod.args['test']['d']) == 4 @@ -138,28 +138,6 @@ def test_bad_input_json_argparse(): # TESTS DEMONSTRATING BAD BEHAVIOR OF DEFAULT LOADING -class MyExtensionOld(mm.Schema): - a = mm.fields.Str(description='a string') - b = mm.fields.Int(description='an integer') - c = mm.fields.Int(description='an integer', default=10) - d = mm.fields.List(mm.fields.Int, - description='a list of integers') - - -class SimpleExtensionOld(ArgSchema): - test = mm.fields.Nested(MyExtensionOld, default=None, required=True) - - -def test_simple_extension_old_pass(): - mod = ArgSchemaParser( - input_data=SimpleExtension_example_valid, - schema_type=SimpleExtensionOld, args=[]) - assert mod.args['test']['a'] == 'hello' - assert mod.args['test']['b'] == 1 - assert mod.args['test']['c'] == 10 - assert len(mod.args['test']['d']) == 3 - - class RecursiveSchema(argschema.schemas.DefaultSchema): children = mm.fields.Nested("self", many=True, description='children of this node') diff --git a/test/test_utils.py b/test/test_utils.py index b75b8222..9948790b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -131,11 +131,8 @@ def test_schema_argparser_with_baseball(): schema_type=BaseballSituation, args=[]) parser = utils.schema_argparser(schema) help = parser.format_help() - help = help.replace('\n', '').replace(' ', '') - assert( - '--strikesSTRIKEShowmanystrikes(0-2)(REQUIRED)(validoptionsare[0,1,2])' in help) - assert( - '--bases_occupied[BASES_OCCUPIED[BASES_OCCUPIED...]]whichbasesareoccupied(constrainedlist)(validoptionsare[1,2,3])' in help) - assert( - '--ballsBALLSnumberofballs(0-4)(default=0)(validoptionsare[0,1,2,3])' in help) + help = help.replace('\n','').replace(' ','') + assert('--strikesSTRIKEShowmanystrikes(0-2)(REQUIRED)(validoptionsare[0,1,2])' in help) + assert('--bases_occupiedBASES_OCCUPIEDwhichbasesareoccupied(constrainedlist)(validoptionsare[1,2,3])' in help) + assert('--ballsBALLSnumberofballs(0-4)(default=0)(validoptionsare[0,1,2,3])' in help) assert("--pitcher.numberPITCHER.NUMBERplayer'snumber(mustbe>0)(REQUIRED)" in help) From 5818c5485ff4ebb6f925d84af01896c873ab208b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Thu, 26 Apr 2018 13:10:10 -0700 Subject: [PATCH 05/86] flake8 improvements --- test/test_utils.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9948790b..b75b8222 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -131,8 +131,11 @@ def test_schema_argparser_with_baseball(): schema_type=BaseballSituation, args=[]) parser = utils.schema_argparser(schema) help = parser.format_help() - help = help.replace('\n','').replace(' ','') - assert('--strikesSTRIKEShowmanystrikes(0-2)(REQUIRED)(validoptionsare[0,1,2])' in help) - assert('--bases_occupiedBASES_OCCUPIEDwhichbasesareoccupied(constrainedlist)(validoptionsare[1,2,3])' in help) - assert('--ballsBALLSnumberofballs(0-4)(default=0)(validoptionsare[0,1,2,3])' in help) + help = help.replace('\n', '').replace(' ', '') + assert( + '--strikesSTRIKEShowmanystrikes(0-2)(REQUIRED)(validoptionsare[0,1,2])' in help) + assert( + '--bases_occupied[BASES_OCCUPIED[BASES_OCCUPIED...]]whichbasesareoccupied(constrainedlist)(validoptionsare[1,2,3])' in help) + assert( + '--ballsBALLSnumberofballs(0-4)(default=0)(validoptionsare[0,1,2,3])' in help) assert("--pitcher.numberPITCHER.NUMBERplayer'snumber(mustbe>0)(REQUIRED)" in help) From a970d4863073af992d38ea5279d002c1a4c77c86 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 27 Apr 2018 07:54:47 -0700 Subject: [PATCH 06/86] flake8 changes --- argschema/__init__.py | 8 +++++--- argschema/fields/__init__.py | 15 ++++++++------- test/test_first_test.py | 1 + 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/argschema/__init__.py b/argschema/__init__.py index 3f4c03e3..984ab7a6 100644 --- a/argschema/__init__.py +++ b/argschema/__init__.py @@ -1,7 +1,9 @@ '''argschema: flexible definition, validation and setting of parameters''' -from .fields import InputFile, InputDir, OutputFile -from .schemas import ArgSchema -from .argschema_parser import ArgSchemaParser +from .fields import InputFile, InputDir, OutputFile, OptionList # noQA:F401 +from .schemas import ArgSchema # noQA:F401 +from .argschema_parser import ArgSchemaParser # noQA:F401 +from .deprecated import JsonModule, ModuleParameters # noQA:F401 + def main(): # pragma: no cover jm = ArgSchemaParser() diff --git a/argschema/fields/__init__.py b/argschema/fields/__init__.py index 5adb5886..c752233c 100644 --- a/argschema/fields/__init__.py +++ b/argschema/fields/__init__.py @@ -1,13 +1,14 @@ '''sub-module for custom marshmallow fields of general utility''' -from marshmallow.fields import * -from marshmallow.fields import __all__ as __mmall__ -from .files import OutputFile, InputDir, InputFile, OutputDir -from .numpyarrays import NumpyArray -from .loglevel import LogLevel -from .slice import Slice +from marshmallow.fields import * # noQA:F401 +from marshmallow.fields import __all__ as __mmall__ # noQA:F401 +from .files import OutputFile, InputDir, InputFile, OutputDir # noQA:F401 +from .numpyarrays import NumpyArray # noQA:F401 +from .deprecated import OptionList # noQA:F401 +from .loglevel import LogLevel # noQA:F401 +from .slice import Slice # noQA:F401 __all__ = __mmall__ + ['OutputFile', 'InputDir', 'InputFile', 'OutputDir', - 'NumpyArray','LogLevel', 'Slice'] + 'NumpyArray', 'OptionList', 'LogLevel', 'Slice'] # Python 2 subpackage (not module) * imports break if items in __all__ # are unicode. diff --git a/test/test_first_test.py b/test/test_first_test.py index b7a94b28..5d58edf2 100644 --- a/test/test_first_test.py +++ b/test/test_first_test.py @@ -138,6 +138,7 @@ def test_bad_input_json_argparse(): # TESTS DEMONSTRATING BAD BEHAVIOR OF DEFAULT LOADING + class RecursiveSchema(argschema.schemas.DefaultSchema): children = mm.fields.Nested("self", many=True, description='children of this node') From 0d35f2d0fe9e48a64d0896542d70ae2af4f916b4 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 28 Apr 2018 08:23:35 -0700 Subject: [PATCH 07/86] removing more deprecated items --- argschema/__init__.py | 1 - argschema/fields/__init__.py | 1 - 2 files changed, 2 deletions(-) diff --git a/argschema/__init__.py b/argschema/__init__.py index 984ab7a6..28173c45 100644 --- a/argschema/__init__.py +++ b/argschema/__init__.py @@ -2,7 +2,6 @@ from .fields import InputFile, InputDir, OutputFile, OptionList # noQA:F401 from .schemas import ArgSchema # noQA:F401 from .argschema_parser import ArgSchemaParser # noQA:F401 -from .deprecated import JsonModule, ModuleParameters # noQA:F401 def main(): # pragma: no cover diff --git a/argschema/fields/__init__.py b/argschema/fields/__init__.py index c752233c..94857d9b 100644 --- a/argschema/fields/__init__.py +++ b/argschema/fields/__init__.py @@ -3,7 +3,6 @@ from marshmallow.fields import __all__ as __mmall__ # noQA:F401 from .files import OutputFile, InputDir, InputFile, OutputDir # noQA:F401 from .numpyarrays import NumpyArray # noQA:F401 -from .deprecated import OptionList # noQA:F401 from .loglevel import LogLevel # noQA:F401 from .slice import Slice # noQA:F401 From 63ed42e6fde460601c221683ea70cc7962b59023 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 28 Apr 2018 08:16:46 -0700 Subject: [PATCH 08/86] adding sources --- argschema/argschema_parser.py | 39 ++++++++++++++++++++-------- argschema/sources/__init__.py | 0 argschema/sources/json_source.py | 27 ++++++++++++++++++++ argschema/sources/pika_source.py | 29 +++++++++++++++++++++ argschema/sources/source.py | 44 ++++++++++++++++++++++++++++++++ argschema/sources/yaml_source.py | 10 ++++++++ test/test_sources.py | 19 ++++++++++++++ 7 files changed, 157 insertions(+), 11 deletions(-) create mode 100644 argschema/sources/__init__.py create mode 100644 argschema/sources/json_source.py create mode 100644 argschema/sources/pika_source.py create mode 100644 argschema/sources/source.py create mode 100644 argschema/sources/yaml_source.py create mode 100644 test/test_sources.py diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 6edb4ee8..e8699701 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -7,7 +7,7 @@ from . import utils from . import fields import marshmallow as mm - +from .sources.json_source import JsonSource def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema @@ -76,11 +76,15 @@ class ArgSchemaParser(object): Parameters ---------- input_data : dict or None - dictionary parameters instead of --input_json + dictionary parameters to fall back on if all source aren't present schema_type : schemas.ArgSchema the schema to use to validate the parameters output_schema_type : marshmallow.Schema the schema to use to validate the output_json, used by self.output + input_source : argschema.sources.source.Source + a generic source of a dictionary + output_source : argschema.sources.source.Source + a generic output to put output dictionary args : list or None command line arguments passed to the module, if None use argparse to parse the command line, set to [] if you want to bypass command line parsing logger_name : str @@ -95,19 +99,23 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None + input_config_map = [ JsonSource ] + output_config_map = [ JsonSource ] def __init__(self, input_data=None, # dictionary input as option instead of --input_json schema_type=None, # schema for parsing arguments output_schema_type=None, # schema for parsing output_json args=None, + input_source = None, + output_source = None, logger_name=__name__): if schema_type is None: schema_type = self.default_schema if output_schema_type is None: output_schema_type = self.default_output_schema - + self.schema = schema_type() self.logger = self.initialize_logger(logger_name, 'WARNING') self.logger.debug('input_data is {}'.format(input_data)) @@ -118,15 +126,24 @@ def __init__(self, argsdict = utils.args_to_dict(argsobj, self.schema) self.logger.debug('argsdict is {}'.format(argsdict)) - if argsobj.input_json is not None: - fields.files.validate_input_path(argsobj.input_json) - with open(argsobj.input_json, 'r') as j: - jsonargs = json.load(j) - else: - jsonargs = input_data if input_data else {} - + #if you received an input_source, get the dictionary from there + if input_source is not None: + input_data = input_source.get_dict() + + #loop over the set of input_configurations to see if the command line arguments include a valid configuration + #for one of them + for InputSource in self.input_config_map: + try: + input_config_d = InputSource.get_config(InputSource.InputConfigSchema,argsdict) + input_source = InputSource(**input_config_d) + input_data = input_source.get_dict() + #if the command line argument dictionary doesn't contain a valid configuration + #simply move on to the next one + except mm.ValidationError as e: + pass + # merge the command line dictionary into the input json - args = utils.smart_merge(jsonargs, argsdict) + args = utils.smart_merge(input_data, argsdict) self.logger.debug('args after merge {}'.format(args)) # validate with load! diff --git a/argschema/sources/__init__.py b/argschema/sources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py new file mode 100644 index 00000000..c778482c --- /dev/null +++ b/argschema/sources/json_source.py @@ -0,0 +1,27 @@ +from .source import FileSource +import json +import marshmallow as mm +import argschema + +class JsonInputConfigSchema(mm.Schema): + input_json = argschema.fields.InputFile(required=True, + description = 'filepath to input_json') + +class JsonOutputConfigSchema(mm.Schema): + output_json = argschema.fields.OutputFile(required=True, + description = 'filepath to save output_json') + +class JsonSource(FileSource): + InputConfigSchema = JsonInputConfigSchema + OutputConfigSchema = JsonOutputConfigSchema + def __init__(self,input_json=None, output_json=None): + if input_json is not None: + self.filepath = input_json + if output_json is not None: + self.filepath = output_json + + def read_file(self,fp): + return json.load(fp) + + def write_file(self,fp,d): + json.dump(d,fp) diff --git a/argschema/sources/pika_source.py b/argschema/sources/pika_source.py new file mode 100644 index 00000000..ee9b22a3 --- /dev/null +++ b/argschema/sources/pika_source.py @@ -0,0 +1,29 @@ +from .source import InputSource +import pika +import json + +class PikaJsonSource(InputSource): + + def __init__(self,channel,queue): + """Pika client source for dictionary + + Parameters + ---------- + channel: pika.channel.Channel + pika client channel to connect to + queue: str + queue name to get message from + """ + assert(type(channel)==pika.channel.Channel) + self.channel = channel + self.queue = queue + + def get_dict(self): + method_frame, header_frame, body = self.channel.basic_get(self.queue) + if method_frame: + d = json.loads(body) + self.channel.basic_ack(method_frame.delivery_tag) + return d + + def put_dict(self,d): + \ No newline at end of file diff --git a/argschema/sources/source.py b/argschema/sources/source.py new file mode 100644 index 00000000..41b28dd3 --- /dev/null +++ b/argschema/sources/source.py @@ -0,0 +1,44 @@ +import json +import marshmallow as mm + +class Source(object): + InputConfigSchema = None + OutputConfigSchema = None + + def __init__(self): + pass + + def get_dict(self): + pass + + def put_dict(self,d): + pass + + @staticmethod + def get_config(Schema,d): + schema = Schema() + result,errors = schema.load(d) + if len(errors)>0: + raise mm.ValidationError(json.dumps(errors, indent=2)) + return result + +class FileSource(Source): + + def __init__(self,filepath): + self.filepath = filepath + + def get_dict(self): + with open(self.filepath,'r') as fp: + d = self.read_file(fp) + return d + + def put_dict(self,d): + with open(self.filepath,'w') as fp: + self.write_file(fp,d) + + def read_file(self,fp): + pass + + def write_file(self,fp,d): + pass + diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py new file mode 100644 index 00000000..ed3ef0a4 --- /dev/null +++ b/argschema/sources/yaml_source.py @@ -0,0 +1,10 @@ +import yaml +from .source import FileSource + +class YamlSource(FileSource): + + def read_file(self,fp): + return yaml.load(fp) + + def write_file(self,fp,d): + yaml.dump(d,fp) \ No newline at end of file diff --git a/test/test_sources.py b/test/test_sources.py new file mode 100644 index 00000000..48308794 --- /dev/null +++ b/test/test_sources.py @@ -0,0 +1,19 @@ +import argschema +from argschema.sources.json_source import JsonSource +from argschema.sources.yaml_source import YamlSource +from test_argschema_parser import MyParser +import json + +def test_json_source(tmpdir): + file_in = tmpdir.join('test_input_json.json') + input_data = { + 'a':5, + 'nest':{ + 'one':7, + 'two':False + } + } + json.dump(input_data,file_in) + mod = MyParser(input_source= JsonSource(str(file_in)), args=[]) + + \ No newline at end of file From 00051f0135a699357ea292aec121428efe4ffb03 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 09:33:59 -0800 Subject: [PATCH 09/86] fixed test so that input_json is valid json --- test/test_first_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_first_test.py b/test/test_first_test.py index 5d58edf2..7ace5ac2 100644 --- a/test/test_first_test.py +++ b/test/test_first_test.py @@ -17,7 +17,7 @@ def test_bad_path(): def test_simple_example(tmpdir): file_in = tmpdir.join('test_input_json.json') - file_in.write('nonesense') + file_in.write('{}') file_out = tmpdir.join('test_output.json') From 8f06171cf5765991c0f311b037a1ea244b167d99 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 16:38:08 -0800 Subject: [PATCH 10/86] merging changes with sink options --- argschema/argschema_parser.py | 82 +++++++++++++++++++++++--------- argschema/schemas.py | 9 ++-- argschema/sources/json_source.py | 18 +++---- argschema/sources/source.py | 66 +++++++++++++++++-------- argschema/sources/yaml_source.py | 24 +++++++++- argschema/utils.py | 37 ++++++++------ test/sources/test_classes.py | 11 +++++ test/sources/test_json.py | 28 +++++++++++ test/sources/test_yaml.py | 29 +++++++++++ test/test_first_test.py | 9 +--- test/test_sources.py | 19 -------- 11 files changed, 233 insertions(+), 99 deletions(-) create mode 100644 test/sources/test_classes.py create mode 100644 test/sources/test_json.py create mode 100644 test/sources/test_yaml.py delete mode 100644 test/test_sources.py diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index e8699701..cb083272 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -7,7 +7,9 @@ from . import utils from . import fields import marshmallow as mm -from .sources.json_source import JsonSource +from .sources.json_source import JsonSource, JsonSink +from .sources.yaml_source import YamlSource, YamlSink +from .sources.source import NotConfiguredSourceError def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema @@ -100,7 +102,7 @@ class ArgSchemaParser(object): default_schema = schemas.ArgSchema default_output_schema = None input_config_map = [ JsonSource ] - output_config_map = [ JsonSource ] + output_config_map = [ JsonSink ] def __init__(self, input_data=None, # dictionary input as option instead of --input_json @@ -108,7 +110,7 @@ def __init__(self, output_schema_type=None, # schema for parsing output_json args=None, input_source = None, - output_source = None, + output_sink = None, logger_name=__name__): if schema_type is None: @@ -121,7 +123,17 @@ def __init__(self, self.logger.debug('input_data is {}'.format(input_data)) # convert schema to argparse object - p = utils.schema_argparser(self.schema) + + #consolidate a list of the input and output source + #command line configuration schemas + io_schemas = [] + for in_cfg in self.input_config_map: + io_schemas.append(in_cfg.ConfigSchema()) + for out_cfg in self.output_config_map: + io_schemas.append(out_cfg.ConfigSchema()) + + #build a command line parser from the input schemas and configurations + p = utils.schema_argparser(self.schema,io_schemas) argsobj = p.parse_args(args) argsdict = utils.args_to_dict(argsobj, self.schema) self.logger.debug('argsdict is {}'.format(argsdict)) @@ -129,23 +141,37 @@ def __init__(self, #if you received an input_source, get the dictionary from there if input_source is not None: input_data = input_source.get_dict() - - #loop over the set of input_configurations to see if the command line arguments include a valid configuration - #for one of them + else: #see if the input_data itself contains an InputSource configuration use that + for InputSource in self.input_config_map: + try: + input_data = get_input(InputSource,input_data) + except NotConfiguredSourceError as e: + pass + + #loop over the set of input_configurations to see if the command line arguments + # include a valid configuration for an input_source for InputSource in self.input_config_map: try: - input_config_d = InputSource.get_config(InputSource.InputConfigSchema,argsdict) - input_source = InputSource(**input_config_d) - input_data = input_source.get_dict() + input_data = get_input(InputSource,argsdict) #if the command line argument dictionary doesn't contain a valid configuration #simply move on to the next one - except mm.ValidationError as e: + except NotConfiguredSourceError as e: pass - + # merge the command line dictionary into the input json args = utils.smart_merge(input_data, argsdict) self.logger.debug('args after merge {}'.format(args)) + # if the output source was not passed in, see if there is a configuration in the combined args + if output_sink is None: + for OutputSink in self.output_config_map: + try: + output_config_d = OutputSink.get_config(OutputSink.ConfigSchema,args) + output_sink = OutputSink(**output_config_d) + except NotConfiguredSourceError: + pass + # save the output source for later + self.output_sink = output_sink # validate with load! result = self.load_schema_with_defaults(self.schema, args) @@ -182,30 +208,36 @@ def get_output_json(self, d): return output_json - def output(self, d, output_path=None, **json_dump_options): + def output(self,d,output_path=None,sink=None,**sink_options): """method for outputing dictionary to the output_json file path after validating it through the output_schema_type Parameters ---------- d:dict - output dictionary to output + output dictionary to output + sink: argschema.sources.source.ArgSink + output_sink to output to (optional default to self.output_source) output_path: str path to save to output file, optional (with default to self.mod['output_json'] location) - **json_dump_options : - will be passed through to json.dump - + **sink_options : + will be passed through to sink.put_dict + + (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) Raises ------ marshmallow.ValidationError If any of the output dictionary doesn't meet the output schema """ - if output_path is None: - output_path = self.args['output_json'] - - output_json = self.get_output_json(d) - with open(output_path, 'w') as fp: - json.dump(output_json, fp, **json_dump_options) + + output_d = self.get_output_json(d) + if output_path is not None: + self.logger.warning('DEPRECATED, pass sink instead') + sink = JsonSink(output_json=output_path) + if sink is not None: + sink.put_dict(output_d) + else: + self.output_sink.put_dict(output_d,**sink_options) def load_schema_with_defaults(self, schema, args): """method for deserializing the arguments dictionary (args) @@ -260,3 +292,7 @@ def initialize_logger(name, log_level): logger = logging.getLogger(name) logger.setLevel(level=level) return logger + +class ArgSchemaYamlParser(ArgSchemaParser): + input_config_map = [YamlSource] + output_config_map = [YamlSink] \ No newline at end of file diff --git a/argschema/schemas.py b/argschema/schemas.py index 2f6a171e..5c17b1ce 100644 --- a/argschema/schemas.py +++ b/argschema/schemas.py @@ -34,11 +34,10 @@ class ArgSchema(DefaultSchema): input_json and output_json files and the log_level """ - input_json = InputFile( - description="file path of input json file") - - output_json = OutputFile( - description="file path to output json file") + # input_json = InputFile( + # description= "file path of input json file") + # output_json = OutputFile( + # description= "file path to output json file") log_level = LogLevel( default='ERROR', description="set the logging level of the module") diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index c778482c..b2754362 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -1,4 +1,4 @@ -from .source import FileSource +from .source import FileSource, FileSink import json import marshmallow as mm import argschema @@ -12,16 +12,18 @@ class JsonOutputConfigSchema(mm.Schema): description = 'filepath to save output_json') class JsonSource(FileSource): - InputConfigSchema = JsonInputConfigSchema - OutputConfigSchema = JsonOutputConfigSchema - def __init__(self,input_json=None, output_json=None): - if input_json is not None: - self.filepath = input_json - if output_json is not None: - self.filepath = output_json + ConfigSchema = JsonInputConfigSchema + def __init__(self,input_json=None): + self.filepath = input_json def read_file(self,fp): return json.load(fp) + +class JsonSink(FileSink): + ConfigSchema = JsonOutputConfigSchema + + def __init__(self,output_json=None): + self.filepath = output_json def write_file(self,fp,d): json.dump(d,fp) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 41b28dd3..b9a060de 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -1,28 +1,53 @@ import json import marshmallow as mm -class Source(object): - InputConfigSchema = None - OutputConfigSchema = None +class ConfigurableSourceError(mm.ValidationError): + """Base Exception class for configurable sources""" + pass - def __init__(self): - pass +class MisconfiguredSourceError(ConfigurableSourceError): + """Exception when a source configuration was present in part but failed + validation""" + pass - def get_dict(self): - pass - - def put_dict(self,d): - pass +class NotConfiguredSourceError(ConfigurableSourceError): + """Exception when the source configuration is simply completely missing""" + pass +class ImproperSourceConfigurationSchemaError(ConfigurableSourceError): + """Exception when the source configuration schema isn't valid""" + pass + +def d_contains_any_fields(schema,d): + for field_name, field in schema.declared_fields.items(): + if field_name in d.keys(): + if d[field_name] is not None: + return True + return False + +class ConfigurableSource(object): + ConfigSchema = None @staticmethod def get_config(Schema,d): schema = Schema() - result,errors = schema.load(d) - if len(errors)>0: - raise mm.ValidationError(json.dumps(errors, indent=2)) - return result + if not d_contains_any_fields(schema,d): + raise NotConfiguredSourceError("This source is not present in \n" + json.dumps(d, indent=2)) + else: + result,errors = schema.load(d) + if len(errors)>0: + raise MisconfiguredSourceError("Source incorrectly configured\n" + json.dumps(errors, indent=2)) + else: + return result + +class ArgSource(ConfigurableSource): + def get_dict(self): + pass -class FileSource(Source): +class ArgSink(ConfigurableSource): + def put_dict(self,d): + pass + +class FileSource(ArgSource): def __init__(self,filepath): self.filepath = filepath @@ -31,14 +56,17 @@ def get_dict(self): with open(self.filepath,'r') as fp: d = self.read_file(fp) return d - - def put_dict(self,d): - with open(self.filepath,'w') as fp: - self.write_file(fp,d) def read_file(self,fp): pass +class FileSink(ArgSink): + def __init__(self,filepath): + self.filepath = filepath + def write_file(self,fp,d): pass + def put_dict(self,d): + with open(self.filepath,'w') as fp: + self.write_file(fp,d) \ No newline at end of file diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index ed3ef0a4..480f4d87 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -1,10 +1,30 @@ import yaml -from .source import FileSource +from .source import FileSource,FileSink +import argschema +import marshmallow as mm + +class YamlInputConfigSchema(mm.Schema): + input_yaml = argschema.fields.InputFile(required=True, + description = 'filepath to input yaml') + +class YamlOutputConfigSchema(mm.Schema): + output_yaml = argschema.fields.OutputFile(required=True, + description = 'filepath to save output yaml') class YamlSource(FileSource): + ConfigSchema = YamlInputConfigSchema + + def __init__(self,input_yaml=None): + self.filepath = input_yaml def read_file(self,fp): return yaml.load(fp) +class YamlSink(FileSink): + ConfigSchema = YamlOutputConfigSchema + + def __init__(self,output_yaml=None): + self.filepath = output_yaml + def write_file(self,fp,d): - yaml.dump(d,fp) \ No newline at end of file + yaml.dump(d,fp,default_flow_style=False) \ No newline at end of file diff --git a/argschema/utils.py b/argschema/utils.py index 5ffe3af1..0b752008 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -345,35 +345,42 @@ def build_schema_arguments(schema, arguments=None, path=None, description=None): return arguments -def schema_argparser(schema): +def schema_argparser(schema, additional_schemas=None): """given a jsonschema, build an argparse.ArgumentParser Parameters ---------- schema : argschema.schemas.ArgSchema schema to build an argparser from - + additional_schemas : list[marshmallow.schema] + list of additional schemas to add to the command line arguments Returns ------- argparse.ArgumentParser - the represents the schema + that represents the schemas """ - # build up a list of argument groups using recursive function - # to traverse the tree, root node gets the description given by doc string - # of the schema - arguments = build_schema_arguments(schema, description=schema.__doc__) - # make the root schema appeear first rather than last - arguments = [arguments[-1]] + arguments[0:-1] + if additional_schemas is not None: + schema_list = [schema] + additional_schemas + else: + schema_list = [schema] parser = argparse.ArgumentParser() - - for arg_group in arguments: - group = parser.add_argument_group( - arg_group['title'], arg_group['description']) - for arg_name, arg in arg_group['args'].items(): - group.add_argument(arg_name, **arg) + for s in schema_list: + # build up a list of argument groups using recursive function + # to traverse the tree, root node gets the description given by doc string + # of the schema + arguments = build_schema_arguments(s, description=schema.__doc__) + + # make the root schema appeear first rather than last + arguments = [arguments[-1]] + arguments[0:-1] + + for arg_group in arguments: + group = parser.add_argument_group( + arg_group['title'], arg_group['description']) + for arg_name, arg in arg_group['args'].items(): + group.add_argument(arg_name, **arg) return parser diff --git a/test/sources/test_classes.py b/test/sources/test_classes.py new file mode 100644 index 00000000..06ac1c83 --- /dev/null +++ b/test/sources/test_classes.py @@ -0,0 +1,11 @@ +import argschema + +class MyNestedSchema(argschema.schemas.DefaultSchema): + one = argschema.fields.Int(required=True,description="nested integer") + two = argschema.fields.Boolean(required=True,description="a nested boolean") + +class MySchema(argschema.ArgSchema): + a = argschema.fields.Int(required=True,description="parameter a") + b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") + nest = argschema.fields.Nested(MyNestedSchema,description="a nested schema") + diff --git a/test/sources/test_json.py b/test/sources/test_json.py new file mode 100644 index 00000000..3055cb20 --- /dev/null +++ b/test/sources/test_json.py @@ -0,0 +1,28 @@ +import argschema +from argschema.sources.json_source import JsonSource +from test_classes import MySchema +import json +import pytest + +class MyParser(argschema.ArgSchemaParser): + default_schema = MySchema + +@pytest.fixture(scope='module') +def test_input_file(tmpdir_factory): + file_in = tmpdir_factory.mktemp('test').join('test_input_json.json') + input_data = { + 'a':5, + 'nest':{ + 'one':7, + 'two':False + } + } + with open(str(file_in),'w') as fp: + json.dump(input_data,fp) + return str(file_in) + +def test_json_source(test_input_file): + mod = MyParser(input_source= JsonSource(test_input_file), args=[]) + +def test_json_source_command(test_input_file): + mod = MyParser(args = ['--input_json',test_input_file]) \ No newline at end of file diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py new file mode 100644 index 00000000..35cdf4a1 --- /dev/null +++ b/test/sources/test_yaml.py @@ -0,0 +1,29 @@ +import argschema +from argschema.sources.yaml_source import YamlSource +from argschema.argschema_parser import ArgSchemaYamlParser +from test_classes import MySchema +import yaml +import pytest + +class MyParser(ArgSchemaYamlParser): + default_schema = MySchema + +@pytest.fixture(scope='module') +def test_input_file(tmpdir_factory): + file_in = tmpdir_factory.mktemp('test').join('test_input_yaml.yml') + input_data = { + 'a':5, + 'nest':{ + 'one':7, + 'two':False + } + } + with open(str(file_in),'w') as fp: + yaml.dump(input_data,fp,default_flow_style=False) + return str(file_in) + +def test_yaml_source(test_input_file): + mod = MyParser(input_source= YamlSource(test_input_file), args=[]) + +def test_yaml_source_command(test_input_file): + mod = MyParser(args = ['--input_yaml',test_input_file]) \ No newline at end of file diff --git a/test/test_first_test.py b/test/test_first_test.py index 7ace5ac2..9aa70b91 100644 --- a/test/test_first_test.py +++ b/test/test_first_test.py @@ -15,15 +15,8 @@ def test_bad_path(): ArgSchemaParser(input_data=example, args=[]) -def test_simple_example(tmpdir): - file_in = tmpdir.join('test_input_json.json') - file_in.write('{}') - - file_out = tmpdir.join('test_output.json') - +def test_simple_example(): example = { - "input_json": str(file_in), - "output_json": str(file_out), "log_level": "CRITICAL"} jm = ArgSchemaParser(input_data=example, args=[]) diff --git a/test/test_sources.py b/test/test_sources.py deleted file mode 100644 index 48308794..00000000 --- a/test/test_sources.py +++ /dev/null @@ -1,19 +0,0 @@ -import argschema -from argschema.sources.json_source import JsonSource -from argschema.sources.yaml_source import YamlSource -from test_argschema_parser import MyParser -import json - -def test_json_source(tmpdir): - file_in = tmpdir.join('test_input_json.json') - input_data = { - 'a':5, - 'nest':{ - 'one':7, - 'two':False - } - } - json.dump(input_data,file_in) - mod = MyParser(input_source= JsonSource(str(file_in)), args=[]) - - \ No newline at end of file From 56223cd72ad9062a598594c0c59d4463011878ca Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 09:46:09 -0800 Subject: [PATCH 11/86] removed unused exception --- argschema/sources/source.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index b9a060de..6cab177f 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -14,10 +14,6 @@ class NotConfiguredSourceError(ConfigurableSourceError): """Exception when the source configuration is simply completely missing""" pass -class ImproperSourceConfigurationSchemaError(ConfigurableSourceError): - """Exception when the source configuration schema isn't valid""" - pass - def d_contains_any_fields(schema,d): for field_name, field in schema.declared_fields.items(): if field_name in d.keys(): From c8a641a10671f35029bd6dc794f326cb810d762e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 09:59:21 -0800 Subject: [PATCH 12/86] added pyyaml as requirement --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 8607ed84..72c76fad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ numpy marshmallow +pyyaml From a81bcc76acfcf5cc791affd4e9118a5e5ba27257 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 10:02:55 -0800 Subject: [PATCH 13/86] removing unused __init__ --- argschema/sources/source.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 6cab177f..0b86c9b4 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -45,9 +45,6 @@ def put_dict(self,d): class FileSource(ArgSource): - def __init__(self,filepath): - self.filepath = filepath - def get_dict(self): with open(self.filepath,'r') as fp: d = self.read_file(fp) @@ -57,8 +54,6 @@ def read_file(self,fp): pass class FileSink(ArgSink): - def __init__(self,filepath): - self.filepath = filepath def write_file(self,fp,d): pass From 88afa434ce683fdf542ea9716a913472a15218fc Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 14:01:48 -0800 Subject: [PATCH 14/86] modified code to raise exception when the more than one configuration is done, and considated looping code into private method --- argschema/argschema_parser.py | 87 ++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 33 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index cb083272..164afbb7 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -11,6 +11,7 @@ from .sources.yaml_source import YamlSource, YamlSink from .sources.source import NotConfiguredSourceError + def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema @@ -101,72 +102,64 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None - input_config_map = [ JsonSource ] - output_config_map = [ JsonSink ] + input_config_map = [JsonSource] + output_config_map = [JsonSink] def __init__(self, input_data=None, # dictionary input as option instead of --input_json schema_type=None, # schema for parsing arguments output_schema_type=None, # schema for parsing output_json args=None, - input_source = None, - output_sink = None, + input_source=None, + output_sink=None, logger_name=__name__): if schema_type is None: schema_type = self.default_schema if output_schema_type is None: output_schema_type = self.default_output_schema - + self.schema = schema_type() self.logger = self.initialize_logger(logger_name, 'WARNING') self.logger.debug('input_data is {}'.format(input_data)) # convert schema to argparse object - #consolidate a list of the input and output source - #command line configuration schemas + # consolidate a list of the input and output source + # command line configuration schemas io_schemas = [] for in_cfg in self.input_config_map: io_schemas.append(in_cfg.ConfigSchema()) for out_cfg in self.output_config_map: io_schemas.append(out_cfg.ConfigSchema()) - #build a command line parser from the input schemas and configurations - p = utils.schema_argparser(self.schema,io_schemas) + # build a command line parser from the input schemas and configurations + p = utils.schema_argparser(self.schema, io_schemas) argsobj = p.parse_args(args) argsdict = utils.args_to_dict(argsobj, self.schema) self.logger.debug('argsdict is {}'.format(argsdict)) - #if you received an input_source, get the dictionary from there + # if you received an input_source, get the dictionary from there if input_source is not None: input_data = input_source.get_dict() - else: #see if the input_data itself contains an InputSource configuration use that - for InputSource in self.input_config_map: - try: - input_data = get_input(InputSource,input_data) - except NotConfiguredSourceError as e: - pass + else: # see if the input_data itself contains an InputSource configuration use that + config_data = self.__get_input_data_from_config(input_data) + input_data = config_data if config_data is not None else input_data - #loop over the set of input_configurations to see if the command line arguments - # include a valid configuration for an input_source - for InputSource in self.input_config_map: - try: - input_data = get_input(InputSource,argsdict) - #if the command line argument dictionary doesn't contain a valid configuration - #simply move on to the next one - except NotConfiguredSourceError as e: - pass + # check whether the command line arguments contain an input configuration and use that + config_data = self.__get_input_data_from_config(argsdict) + input_data = config_data if config_data is not None else input_data # merge the command line dictionary into the input json args = utils.smart_merge(input_data, argsdict) self.logger.debug('args after merge {}'.format(args)) # if the output source was not passed in, see if there is a configuration in the combined args - if output_sink is None: + if output_sink is None: for OutputSink in self.output_config_map: - try: - output_config_d = OutputSink.get_config(OutputSink.ConfigSchema,args) + try: + output_config_d = OutputSink.get_config( + OutputSink.ConfigSchema, args) output_sink = OutputSink(**output_config_d) except NotConfiguredSourceError: pass @@ -180,6 +173,33 @@ def __init__(self, self.logger = self.initialize_logger( logger_name, self.args.get('log_level')) + def __get_input_data_from_config(self, d): + """private function to check for ArgSource configurations in a dictionary + and return the data if it exists + + Parameters + ---------- + d : dict + dictionary to look for InputSource configuration parameters in + + Returns + ------- + dict or None + dictionary of InputData if it found a valid configuration, None otherwise + """ + input_set = False + input_data = None + for InputSource in self.input_config_map: + try: + input_data = get_input(InputSource, d) + if input_set == True: + raise MultipleConfiguredSourceError( + "more then one InputSource configuration present in {}".format(d)) + input_set = True + except NotConfiguredSourceError as e: + pass + return input_data + def get_output_json(self, d): """method for getting the output_json pushed through validation if validation exists @@ -208,7 +228,7 @@ def get_output_json(self, d): return output_json - def output(self,d,output_path=None,sink=None,**sink_options): + def output(self, d, output_path=None, sink=None, **sink_options): """method for outputing dictionary to the output_json file path after validating it through the output_schema_type @@ -222,14 +242,14 @@ def output(self,d,output_path=None,sink=None,**sink_options): path to save to output file, optional (with default to self.mod['output_json'] location) **sink_options : will be passed through to sink.put_dict - + (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) Raises ------ marshmallow.ValidationError If any of the output dictionary doesn't meet the output schema """ - + output_d = self.get_output_json(d) if output_path is not None: self.logger.warning('DEPRECATED, pass sink instead') @@ -237,7 +257,7 @@ def output(self,d,output_path=None,sink=None,**sink_options): if sink is not None: sink.put_dict(output_d) else: - self.output_sink.put_dict(output_d,**sink_options) + self.output_sink.put_dict(output_d, **sink_options) def load_schema_with_defaults(self, schema, args): """method for deserializing the arguments dictionary (args) @@ -293,6 +313,7 @@ def initialize_logger(name, log_level): logger.setLevel(level=level) return logger + class ArgSchemaYamlParser(ArgSchemaParser): input_config_map = [YamlSource] - output_config_map = [YamlSink] \ No newline at end of file + output_config_map = [YamlSink] From 72579ac1aea717a6f676aab21128bc3c17382614 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 14:02:03 -0800 Subject: [PATCH 15/86] added exception to facilitate checking for extra configurations --- argschema/sources/source.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 0b86c9b4..bbdf51c2 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -14,6 +14,10 @@ class NotConfiguredSourceError(ConfigurableSourceError): """Exception when the source configuration is simply completely missing""" pass +class MultipleConfiguredSourceError(ConfigurableSourceError): + """Exception when there is more than one validly configured Source configured""" + pass + def d_contains_any_fields(schema,d): for field_name, field in schema.declared_fields.items(): if field_name in d.keys(): From da3cbc4c337219fa729e9325e8b9ee81a9927781 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 14:02:25 -0800 Subject: [PATCH 16/86] added more testing for yaml output source --- test/sources/test_classes.py | 3 +++ test/sources/test_yaml.py | 23 ++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/test/sources/test_classes.py b/test/sources/test_classes.py index 06ac1c83..a9de6944 100644 --- a/test/sources/test_classes.py +++ b/test/sources/test_classes.py @@ -9,3 +9,6 @@ class MySchema(argschema.ArgSchema): b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") nest = argschema.fields.Nested(MyNestedSchema,description="a nested schema") +class MyOutputSchema(argschema.schemas.DefaultSchema): + a = argschema.fields.Int(required=True,description="parameter a") + b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") \ No newline at end of file diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 35cdf4a1..eb75b064 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -1,12 +1,15 @@ import argschema -from argschema.sources.yaml_source import YamlSource +from argschema.sources.yaml_source import YamlSource, YamlSink from argschema.argschema_parser import ArgSchemaYamlParser -from test_classes import MySchema +from test_classes import MySchema, MyOutputSchema import yaml import pytest + + class MyParser(ArgSchemaYamlParser): default_schema = MySchema + default_output_schema = MyOutputSchema @pytest.fixture(scope='module') def test_input_file(tmpdir_factory): @@ -26,4 +29,18 @@ def test_yaml_source(test_input_file): mod = MyParser(input_source= YamlSource(test_input_file), args=[]) def test_yaml_source_command(test_input_file): - mod = MyParser(args = ['--input_yaml',test_input_file]) \ No newline at end of file + mod = MyParser(args = ['--input_yaml',test_input_file]) + +def test_yaml_sink(test_input_file,tmpdir): + outfile=tmpdir.join('test_out.yml') + output_data = { + 'a':3 + } + mod = MyParser(input_source= YamlSource(test_input_file), + output_sink = YamlSink(str(outfile))) + mod.output(output_data) + + with open(str(outfile),'r') as fp: + d=yaml.load(fp) + output_data['b']="my value" + assert (output_data == d) From 4dbd25c5a788cf9031b04cccdde6d3b02a0ce12b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 17:33:29 -0800 Subject: [PATCH 17/86] pep8 --- test/sources/test_yaml.py | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index eb75b064..0ff47dbb 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -6,41 +6,44 @@ import pytest - class MyParser(ArgSchemaYamlParser): default_schema = MySchema default_output_schema = MyOutputSchema + @pytest.fixture(scope='module') def test_input_file(tmpdir_factory): file_in = tmpdir_factory.mktemp('test').join('test_input_yaml.yml') input_data = { - 'a':5, - 'nest':{ - 'one':7, - 'two':False + 'a': 5, + 'nest': { + 'one': 7, + 'two': False } } - with open(str(file_in),'w') as fp: - yaml.dump(input_data,fp,default_flow_style=False) + with open(str(file_in), 'w') as fp: + yaml.dump(input_data, fp, default_flow_style=False) return str(file_in) + def test_yaml_source(test_input_file): - mod = MyParser(input_source= YamlSource(test_input_file), args=[]) + mod = MyParser(input_source=YamlSource(test_input_file), args=[]) + def test_yaml_source_command(test_input_file): - mod = MyParser(args = ['--input_yaml',test_input_file]) + mod = MyParser(args=['--input_yaml', test_input_file]) -def test_yaml_sink(test_input_file,tmpdir): - outfile=tmpdir.join('test_out.yml') + +def test_yaml_sink(test_input_file, tmpdir): + outfile = tmpdir.join('test_out.yml') output_data = { - 'a':3 + 'a': 3 } - mod = MyParser(input_source= YamlSource(test_input_file), - output_sink = YamlSink(str(outfile))) + mod = MyParser(input_source=YamlSource(test_input_file), + output_sink=YamlSink(str(outfile))) mod.output(output_data) - - with open(str(outfile),'r') as fp: - d=yaml.load(fp) - output_data['b']="my value" + + with open(str(outfile), 'r') as fp: + d = yaml.load(fp) + output_data['b'] = "my value" assert (output_data == d) From e66b325d03bde8482486072ba6f3633055ec1fe9 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 17:41:24 -0800 Subject: [PATCH 18/86] fixed import error --- argschema/argschema_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 164afbb7..9e571ced 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -9,7 +9,7 @@ import marshmallow as mm from .sources.json_source import JsonSource, JsonSink from .sources.yaml_source import YamlSource, YamlSink -from .sources.source import NotConfiguredSourceError +from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError def contains_non_default_schemas(schema, schema_list=[]): From a0a0048fa03f957c19a403c44110ea0ce7e901b1 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 15 Dec 2017 17:41:38 -0800 Subject: [PATCH 19/86] added tests for multiple input configurations --- test/sources/test_yaml.py | 59 +++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 0ff47dbb..40aded17 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -1,45 +1,60 @@ import argschema from argschema.sources.yaml_source import YamlSource, YamlSink +from argschema.sources.json_source import JsonSource, JsonSink +from argschema.sources.source import MultipleConfiguredSourceError from argschema.argschema_parser import ArgSchemaYamlParser from test_classes import MySchema, MyOutputSchema import yaml import pytest - +import json class MyParser(ArgSchemaYamlParser): default_schema = MySchema default_output_schema = MyOutputSchema +class MyDualParser(MyParser): + input_config_map = [JsonSource, YamlSource] + output_config_map = [JsonSink, YamlSink] + +input_data = { + 'a': 5, + 'nest': { + 'one': 7, + 'two': False + } +} @pytest.fixture(scope='module') -def test_input_file(tmpdir_factory): +def test_yaml_input_file(tmpdir_factory): file_in = tmpdir_factory.mktemp('test').join('test_input_yaml.yml') - input_data = { - 'a': 5, - 'nest': { - 'one': 7, - 'two': False - } - } + with open(str(file_in), 'w') as fp: yaml.dump(input_data, fp, default_flow_style=False) return str(file_in) +@pytest.fixture(scope='module') +def test_json_input_file(tmpdir_factory): + file_in = tmpdir_factory.mktemp('test').join('test_input_json.json') + + with open(str(file_in), 'w') as fp: + json.dump(input_data, fp) + return str(file_in) + -def test_yaml_source(test_input_file): - mod = MyParser(input_source=YamlSource(test_input_file), args=[]) +def test_yaml_source(test_yaml_input_file): + mod = MyParser(input_source=YamlSource(test_yaml_input_file), args=[]) -def test_yaml_source_command(test_input_file): - mod = MyParser(args=['--input_yaml', test_input_file]) +def test_yaml_source_command(test_yaml_input_file): + mod = MyParser(args=['--input_yaml', test_yaml_input_file]) -def test_yaml_sink(test_input_file, tmpdir): +def test_yaml_sink(test_yaml_input_file, tmpdir): outfile = tmpdir.join('test_out.yml') output_data = { 'a': 3 } - mod = MyParser(input_source=YamlSource(test_input_file), + mod = MyParser(input_source=YamlSource(test_yaml_input_file), output_sink=YamlSink(str(outfile))) mod.output(output_data) @@ -47,3 +62,17 @@ def test_yaml_sink(test_input_file, tmpdir): d = yaml.load(fp) output_data['b'] = "my value" assert (output_data == d) + +def test_dual_parser(test_json_input_file,test_yaml_input_file): + + mod = MyDualParser(args=['--input_yaml', test_yaml_input_file]) + assert mod.args['a']==5 + assert mod.args['nest']==input_data['nest'] + + mod = MyDualParser(args=['--input_json', test_json_input_file]) + assert mod.args['a']==5 + assert mod.args['nest']==input_data['nest'] + +def test_dual_parser_fail(test_json_input_file,test_yaml_input_file): + with pytest.raises(MultipleConfiguredSourceError): + mod = MyDualParser(args=['--input_yaml', test_yaml_input_file, '--input_json', test_json_input_file]) \ No newline at end of file From a83a4cc0383000f93299d06cf2e75c8b28a7f7b6 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 09:43:27 -0800 Subject: [PATCH 20/86] made it error if more than one output sink configured --- argschema/argschema_parser.py | 53 +++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 9e571ced..e3c72cd3 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -155,16 +155,11 @@ def __init__(self, self.logger.debug('args after merge {}'.format(args)) # if the output source was not passed in, see if there is a configuration in the combined args - if output_sink is None: - for OutputSink in self.output_config_map: - try: - output_config_d = OutputSink.get_config( - OutputSink.ConfigSchema, args) - output_sink = OutputSink(**output_config_d) - except NotConfiguredSourceError: - pass + if output_sink is None: + output_sink = self.__get_output_sink_from_config(args) # save the output source for later self.output_sink = output_sink + # validate with load! result = self.load_schema_with_defaults(self.schema, args) @@ -173,7 +168,37 @@ def __init__(self, self.logger = self.initialize_logger( logger_name, self.args.get('log_level')) - def __get_input_data_from_config(self, d): + def __get_output_sink_from_config(self,d): + """private function to check for ArgSink configuration in a dictionary and return a configured ArgSink + + Parameters + ---------- + d : dict + dictionary to look for ArgSink Configuration parameters in + + Returns + ------- + ArgSink + A configured argsink + + Raises + ------ + MultipleConfiguredSourceError + If more than one Sink is configured + """ + output_set = False + output_sink = None + for OutputSink in self.output_config_map: + try: + output_config_d = OutputSink.get_config(OutputSink.ConfigSchema,d) + if output_set: + raise MultipleConfiguredSourceError("more then one OutputSink configuration present in {}".format(d)) + output_sink = OutputSink(**output_config_d) + output_set=True + except NotConfiguredSourceError: + pass + + def __get_input_data_from_config(self,d): """private function to check for ArgSource configurations in a dictionary and return the data if it exists @@ -186,15 +211,19 @@ def __get_input_data_from_config(self, d): ------- dict or None dictionary of InputData if it found a valid configuration, None otherwise + + Raises + ------ + MultipleConfiguredSourceError + if more than one InputSource is configured """ input_set = False input_data = None for InputSource in self.input_config_map: try: input_data = get_input(InputSource, d) - if input_set == True: - raise MultipleConfiguredSourceError( - "more then one InputSource configuration present in {}".format(d)) + if input_set: + raise MultipleConfiguredSourceError("more then one InputSource configuration present in {}".format(d)) input_set = True except NotConfiguredSourceError as e: pass From 3e63c22163ff8e2ac4c9d869ad1e52a37ebbfcfc Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 09:52:56 -0800 Subject: [PATCH 21/86] fixed bug in output_sink method --- argschema/argschema_parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index e3c72cd3..2456d9f6 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -159,7 +159,7 @@ def __init__(self, output_sink = self.__get_output_sink_from_config(args) # save the output source for later self.output_sink = output_sink - + # validate with load! result = self.load_schema_with_defaults(self.schema, args) @@ -197,7 +197,8 @@ def __get_output_sink_from_config(self,d): output_set=True except NotConfiguredSourceError: pass - + return output_sink + def __get_input_data_from_config(self,d): """private function to check for ArgSource configurations in a dictionary and return the data if it exists From 348812e5331ab12c80b329b96ed103464bea7181 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 09:53:11 -0800 Subject: [PATCH 22/86] added 2 output config failure test --- test/sources/test_yaml.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 40aded17..96d9ed09 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -75,4 +75,13 @@ def test_dual_parser(test_json_input_file,test_yaml_input_file): def test_dual_parser_fail(test_json_input_file,test_yaml_input_file): with pytest.raises(MultipleConfiguredSourceError): - mod = MyDualParser(args=['--input_yaml', test_yaml_input_file, '--input_json', test_json_input_file]) \ No newline at end of file + mod = MyDualParser(args=['--input_yaml', test_yaml_input_file, '--input_json', test_json_input_file]) + +def test_dual_parser_output_fail(test_json_input_file,tmpdir): + test_json_output = str(tmpdir.join('output.yml')) + test_yaml_output = str(tmpdir.join('output.json')) + with pytest.raises(MultipleConfiguredSourceError): + mod = MyDualParser(args=['--input_json', test_json_input_file, + '--output_json',test_json_output, + '--output_yaml',test_yaml_output]) + \ No newline at end of file From 744d16f30a04dbd5256dbd3b57f3a89a1bffd003 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 13:24:06 -0800 Subject: [PATCH 23/86] renaming default_configurable_sinks,sources --- argschema/argschema_parser.py | 14 +++++++------- test/sources/test_yaml.py | 5 +++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 2456d9f6..b78dd529 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -102,8 +102,8 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None - input_config_map = [JsonSource] - output_config_map = [JsonSink] + default_configurable_sources = [ JsonSource ] + default_configurable_sinks = [ JsonSink ] def __init__(self, input_data=None, # dictionary input as option instead of --input_json @@ -128,9 +128,9 @@ def __init__(self, # consolidate a list of the input and output source # command line configuration schemas io_schemas = [] - for in_cfg in self.input_config_map: + for in_cfg in self.default_configurable_sources: io_schemas.append(in_cfg.ConfigSchema()) - for out_cfg in self.output_config_map: + for out_cfg in self.default_configurable_sinks: io_schemas.append(out_cfg.ConfigSchema()) # build a command line parser from the input schemas and configurations @@ -188,7 +188,7 @@ def __get_output_sink_from_config(self,d): """ output_set = False output_sink = None - for OutputSink in self.output_config_map: + for OutputSink in self.default_configurable_sinks: try: output_config_d = OutputSink.get_config(OutputSink.ConfigSchema,d) if output_set: @@ -220,8 +220,8 @@ def __get_input_data_from_config(self,d): """ input_set = False input_data = None - for InputSource in self.input_config_map: - try: + for InputSource in self.default_configurable_sources: + try: input_data = get_input(InputSource, d) if input_set: raise MultipleConfiguredSourceError("more then one InputSource configuration present in {}".format(d)) diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 96d9ed09..9f990ae3 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -11,10 +11,11 @@ class MyParser(ArgSchemaYamlParser): default_schema = MySchema default_output_schema = MyOutputSchema + default_configurable_sources = [YamlSource] class MyDualParser(MyParser): - input_config_map = [JsonSource, YamlSource] - output_config_map = [JsonSink, YamlSink] + default_configurable_sources = [JsonSource, YamlSource] + default_configurable_sinks = [JsonSink, YamlSink] input_data = { 'a': 5, From 154e5efef189d83e323e44fca4f509a0b8ecca4b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 13:26:06 -0800 Subject: [PATCH 24/86] doc typo --- argschema/argschema_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index b78dd529..fa5ce1ca 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -86,7 +86,7 @@ class ArgSchemaParser(object): the schema to use to validate the output_json, used by self.output input_source : argschema.sources.source.Source a generic source of a dictionary - output_source : argschema.sources.source.Source + output_sink : argschema.sources.source.Source a generic output to put output dictionary args : list or None command line arguments passed to the module, if None use argparse to parse the command line, set to [] if you want to bypass command line parsing From 8dca4a30d8f392b6b5709bffa26989134a19c395 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 13:27:35 -0800 Subject: [PATCH 25/86] doc changes --- argschema/argschema_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index fa5ce1ca..899f059f 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -154,10 +154,10 @@ def __init__(self, args = utils.smart_merge(input_data, argsdict) self.logger.debug('args after merge {}'.format(args)) - # if the output source was not passed in, see if there is a configuration in the combined args + # if the output sink was not passed in, see if there is a configuration in the combined args if output_sink is None: output_sink = self.__get_output_sink_from_config(args) - # save the output source for later + # save the output sink for later self.output_sink = output_sink # validate with load! From 84827b116342c203c5fa19a0c17c596620dc681d Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 16:40:22 -0800 Subject: [PATCH 26/86] merging changes --- argschema/argschema_parser.py | 39 +++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 899f059f..b5abfff5 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -102,8 +102,8 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None - default_configurable_sources = [ JsonSource ] - default_configurable_sinks = [ JsonSink ] + default_configurable_sources = [JsonSource] + default_configurable_sinks = [JsonSink] def __init__(self, input_data=None, # dictionary input as option instead of --input_json @@ -155,8 +155,8 @@ def __init__(self, self.logger.debug('args after merge {}'.format(args)) # if the output sink was not passed in, see if there is a configuration in the combined args - if output_sink is None: - output_sink = self.__get_output_sink_from_config(args) + if output_sink is None: + output_sink = self.__get_output_sink_from_config(args) # save the output sink for later self.output_sink = output_sink @@ -168,14 +168,14 @@ def __init__(self, self.logger = self.initialize_logger( logger_name, self.args.get('log_level')) - def __get_output_sink_from_config(self,d): + def __get_output_sink_from_config(self, d): """private function to check for ArgSink configuration in a dictionary and return a configured ArgSink Parameters ---------- d : dict dictionary to look for ArgSink Configuration parameters in - + Returns ------- ArgSink @@ -189,17 +189,19 @@ def __get_output_sink_from_config(self,d): output_set = False output_sink = None for OutputSink in self.default_configurable_sinks: - try: - output_config_d = OutputSink.get_config(OutputSink.ConfigSchema,d) - if output_set: - raise MultipleConfiguredSourceError("more then one OutputSink configuration present in {}".format(d)) - output_sink = OutputSink(**output_config_d) - output_set=True - except NotConfiguredSourceError: - pass + try: + output_config_d = OutputSink.get_config( + OutputSink.ConfigSchema, d) + if output_set: + raise MultipleConfiguredSourceError( + "more then one OutputSink configuration present in {}".format(d)) + output_sink = OutputSink(**output_config_d) + output_set = True + except NotConfiguredSourceError: + pass return output_sink - - def __get_input_data_from_config(self,d): + + def __get_input_data_from_config(self, d): """private function to check for ArgSource configurations in a dictionary and return the data if it exists @@ -212,7 +214,7 @@ def __get_input_data_from_config(self,d): ------- dict or None dictionary of InputData if it found a valid configuration, None otherwise - + Raises ------ MultipleConfiguredSourceError @@ -224,7 +226,8 @@ def __get_input_data_from_config(self,d): try: input_data = get_input(InputSource, d) if input_set: - raise MultipleConfiguredSourceError("more then one InputSource configuration present in {}".format(d)) + raise MultipleConfiguredSourceError( + "more then one InputSource configuration present in {}".format(d)) input_set = True except NotConfiguredSourceError as e: pass From 619415e8a16bc76cea403312f0ef2afa93933817 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 16:40:35 -0800 Subject: [PATCH 27/86] merging changes --- argschema/argschema_parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index b5abfff5..e3c34b61 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -11,7 +11,6 @@ from .sources.yaml_source import YamlSource, YamlSink from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError - def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema From 6c133c44dd62ccb37c67a403c81c6c3b26ba9fb1 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 13:28:45 -0800 Subject: [PATCH 28/86] fix yaml parser --- argschema/argschema_parser.py | 4 ++-- test/sources/test_yaml.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index e3c34b61..68751011 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -347,5 +347,5 @@ def initialize_logger(name, log_level): class ArgSchemaYamlParser(ArgSchemaParser): - input_config_map = [YamlSource] - output_config_map = [YamlSink] + default_configurable_sources = [YamlSource] + default_configurable_sinks = [YamlSink] diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 9f990ae3..585a8d29 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -11,7 +11,6 @@ class MyParser(ArgSchemaYamlParser): default_schema = MySchema default_output_schema = MyOutputSchema - default_configurable_sources = [YamlSource] class MyDualParser(MyParser): default_configurable_sources = [JsonSource, YamlSource] From d520847fc6b52e0edd930cd9e54e2bf92812300f Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 13:29:58 -0800 Subject: [PATCH 29/86] removing pika source --- argschema/sources/pika_source.py | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 argschema/sources/pika_source.py diff --git a/argschema/sources/pika_source.py b/argschema/sources/pika_source.py deleted file mode 100644 index ee9b22a3..00000000 --- a/argschema/sources/pika_source.py +++ /dev/null @@ -1,29 +0,0 @@ -from .source import InputSource -import pika -import json - -class PikaJsonSource(InputSource): - - def __init__(self,channel,queue): - """Pika client source for dictionary - - Parameters - ---------- - channel: pika.channel.Channel - pika client channel to connect to - queue: str - queue name to get message from - """ - assert(type(channel)==pika.channel.Channel) - self.channel = channel - self.queue = queue - - def get_dict(self): - method_frame, header_frame, body = self.channel.basic_get(self.queue) - if method_frame: - d = json.loads(body) - self.channel.basic_ack(method_frame.delivery_tag) - return d - - def put_dict(self,d): - \ No newline at end of file From 748233bbdd62dd4f51d6e841ee471fc4ae1888a1 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:19:38 -0800 Subject: [PATCH 30/86] made ConfigSource default __init__ and added auto imports to sources sub-package --- argschema/sources/__init__.py | 2 ++ argschema/sources/source.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/argschema/sources/__init__.py b/argschema/sources/__init__.py index e69de29b..4a732b36 100644 --- a/argschema/sources/__init__.py +++ b/argschema/sources/__init__.py @@ -0,0 +1,2 @@ +from .source import ArgSink, ArgSource +from .json_source import JsonSource, JsonSink diff --git a/argschema/sources/source.py b/argschema/sources/source.py index bbdf51c2..f6e258b9 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -27,6 +27,10 @@ def d_contains_any_fields(schema,d): class ConfigurableSource(object): ConfigSchema = None + def __init__(self,**kwargs): + for key,value in kwargs.items(): + self.__dict__[key]=value + @staticmethod def get_config(Schema,d): schema = Schema() From 9cb0580bfda78a02a6fcd82b2860a2150f4097b0 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:19:48 -0800 Subject: [PATCH 31/86] added a test for a novel UrlSource --- test/sources/test_url.py | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 test/sources/test_url.py diff --git a/test/sources/test_url.py b/test/sources/test_url.py new file mode 100644 index 00000000..e82e4d04 --- /dev/null +++ b/test/sources/test_url.py @@ -0,0 +1,58 @@ +from argschema.sources import ArgSource +from argschema.schemas import DefaultSchema +from argschema.fields import Str,Int +from argschema import ArgSchemaParser +import requests +import mock +from test_classes import MySchema + +class UrlSourceConfig(DefaultSchema): + input_host = Str(required=True, description="host of url") + input_port = Int(required=False, default=80, description="port of url") + input_url = Str(required=True, description="location on host of input") + +class UrlSource(ArgSource): + ConfigSchema = UrlSourceConfig + def get_dict(self): + url = "http://{}:{}/{}".format(self.input_host, + self.input_port, + self.input_url) + response = requests.get(url) + return response.json() + +class UrlArgSchemaParser(ArgSchemaParser): + default_configurable_sources = [UrlSource] + default_schema = MySchema + +# This method will be used by the mock to replace requests.get +def mocked_requests_get(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + if args[0] == 'http://localhost:88/test.json': + return MockResponse({ + 'a':7, + 'nest':{ + 'one':7, + 'two':False + } + }, 200) + + + return MockResponse(None, 404) + +@mock.patch('requests.get', side_effect=mocked_requests_get) +def test_url_parser(mock_get): + input_source=UrlSource(input_host='localhost',input_port=88,input_url='test.json') + mod = UrlArgSchemaParser(input_source=input_source,args = []) + assert(mod.args['a']==7) + +@mock.patch('requests.get', side_effect=mocked_requests_get) +def test_url_parser_command_line(mock_get): + mod = UrlArgSchemaParser(args = ['--input_host','localhost','--input_port','88','--input_url','test.json']) + assert(mod.args['a']==7) From 1389e9c727b9eea5ab1c59718e780492da5f0123 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:19:59 -0800 Subject: [PATCH 32/86] added requests as a test requirement --- test_requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test_requirements.txt b/test_requirements.txt index 9fe4b377..82527cc6 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -9,3 +9,4 @@ pylint>=1.5.4 flake8>=3.0.4 rstcheck sphinx +requests \ No newline at end of file From 01e460cb6157a1d9e38861945e84618fd0030101 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:27:21 -0800 Subject: [PATCH 33/86] added a novel test for a new source UrlSource to demonstrate plugin functionality of Sources/Sinks, and wrote some documenation --- docs/user/intro.rst | 54 +++++++++++++++++++++++++++++--------- test/sources/test_url.py | 24 +---------------- test/sources/url_source.py | 24 +++++++++++++++++ 3 files changed, 67 insertions(+), 35 deletions(-) create mode 100644 test/sources/url_source.py diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 04138f7c..691f38d0 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -1,5 +1,18 @@ User Guide ===================================== +Installation +------------ +install via source code + +:: + + $ python setup.py install + +or pip + +:: + + $ pip install argschema Your First Module ------------------ @@ -181,6 +194,35 @@ example, having an invalid literal) we will see a casting validation error: argschema does not support setting :class:`~marshmallow.fields.Dict` at the command line. +Alternate Sources/Sinks +----------------------- +A json files are just one way that you might decide to store module parameter dictionaries or outputs. +For example, yaml is another perfectly reasonable choice for storing nested key values stores. Argschema by default provides +json support because that is what we use most frequently at the Allen Institute, however we have generalized the concept +to allow ArgSchemaParser to plugin alternative "sources" and "sinks" of parameters. + +You can pass an ArgSchemaParser an `~argschema.sources.ArgSource` object which implements a get_dict method, +and `~argschema.ArgSchemaParser` will get its input parameters from that dictionary. + +Similarly you can pass an `~argschema.sources.ArgSink` object which implements a put_dict method, +and `~argschema.ArgSchemaParser.output` will output the dictionary however that ArgSink specifies it should. + +Finally, both `~argschema.sources.ArgSource` and `~argschema.sources.ArgSink` have a property called ConfigSchema, +which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. +For example, the default `~argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. +This is how `~argschema.ArgSchemaParser` is told what keys and values should be read to initialize the `~argschema.sources.ArgSource` +or `~argschema.sources.ArgSink`. + +So for example, if you wanted to define a `~argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, +and a module which had a command line interface for setting that host port and url you could do so like this. + +.. literalinclude:: ../../test/url_source.py + +so now a UrlArgSchemaParser would expect command line flags of --input_host, --input_port, --input_url, and will look to download the json +from an http location via requests, or an existing ArgSchemaParser module could be simply passed an UrlSource, even though the original module +author didn't explicitly support passing parameters by http location, and the parameters will still be deserialized and validated all the same. + + Sphinx Documentation -------------------- argschema comes with a autodocumentation feature for Sphnix which will help you automatically @@ -196,19 +238,7 @@ To configure sphinx to use this function, you must be using the sphnix autodoc m def setup(app): app.connect('autodoc-process-docstring',process_schemas) -Installation ------------- -install via source code - -:: - - $ python setup.py install - -or pip - -:: - $ pip install argschema .. toctree:: diff --git a/test/sources/test_url.py b/test/sources/test_url.py index e82e4d04..60a199de 100644 --- a/test/sources/test_url.py +++ b/test/sources/test_url.py @@ -1,28 +1,6 @@ -from argschema.sources import ArgSource -from argschema.schemas import DefaultSchema -from argschema.fields import Str,Int -from argschema import ArgSchemaParser import requests import mock -from test_classes import MySchema - -class UrlSourceConfig(DefaultSchema): - input_host = Str(required=True, description="host of url") - input_port = Int(required=False, default=80, description="port of url") - input_url = Str(required=True, description="location on host of input") - -class UrlSource(ArgSource): - ConfigSchema = UrlSourceConfig - def get_dict(self): - url = "http://{}:{}/{}".format(self.input_host, - self.input_port, - self.input_url) - response = requests.get(url) - return response.json() - -class UrlArgSchemaParser(ArgSchemaParser): - default_configurable_sources = [UrlSource] - default_schema = MySchema +from url_source import UrlArgSchemaParser, UrlSource # This method will be used by the mock to replace requests.get def mocked_requests_get(*args, **kwargs): diff --git a/test/sources/url_source.py b/test/sources/url_source.py new file mode 100644 index 00000000..2071322c --- /dev/null +++ b/test/sources/url_source.py @@ -0,0 +1,24 @@ +from argschema.sources import ArgSource +from argschema.schemas import DefaultSchema +from argschema.fields import Str,Int +from argschema import ArgSchemaParser +from test_classes import MySchema +import requests + +class UrlSourceConfig(DefaultSchema): + input_host = Str(required=True, description="host of url") + input_port = Int(required=False, default=80, description="port of url") + input_url = Str(required=True, description="location on host of input") + +class UrlSource(ArgSource): + ConfigSchema = UrlSourceConfig + def get_dict(self): + url = "http://{}:{}/{}".format(self.input_host, + self.input_port, + self.input_url) + response = requests.get(url) + return response.json() + +class UrlArgSchemaParser(ArgSchemaParser): + default_configurable_sources = [UrlSource] + default_schema = MySchema From 5b91bbeeb6b39762e91909544c9da230d82ef57d Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:37:39 -0800 Subject: [PATCH 34/86] fixed doc include --- docs/user/intro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 691f38d0..a3f84af2 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -216,7 +216,7 @@ or `~argschema.sources.ArgSink`. So for example, if you wanted to define a `~argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, and a module which had a command line interface for setting that host port and url you could do so like this. -.. literalinclude:: ../../test/url_source.py +.. literalinclude:: ../../test/sources/url_source.py so now a UrlArgSchemaParser would expect command line flags of --input_host, --input_port, --input_url, and will look to download the json from an http location via requests, or an existing ArgSchemaParser module could be simply passed an UrlSource, even though the original module From 286b07f40f0c63bfddc01ee24b7e4f2df5812709 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:40:59 -0800 Subject: [PATCH 35/86] fixed doc links --- docs/user/intro.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index a3f84af2..9f09d6ce 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -201,19 +201,19 @@ For example, yaml is another perfectly reasonable choice for storing nested key json support because that is what we use most frequently at the Allen Institute, however we have generalized the concept to allow ArgSchemaParser to plugin alternative "sources" and "sinks" of parameters. -You can pass an ArgSchemaParser an `~argschema.sources.ArgSource` object which implements a get_dict method, -and `~argschema.ArgSchemaParser` will get its input parameters from that dictionary. +You can pass an ArgSchemaParser an :class:`~argschema.sources.ArgSource` object which implements a get_dict method, +and :class:`~argschema.ArgSchemaParser` will get its input parameters from that dictionary. -Similarly you can pass an `~argschema.sources.ArgSink` object which implements a put_dict method, -and `~argschema.ArgSchemaParser.output` will output the dictionary however that ArgSink specifies it should. +Similarly you can pass an :class:`~argschema.sources.ArgSink` object which implements a put_dict method, +and :method:`~argschema.ArgSchemaParser.output` will output the dictionary however that :class:`~argschema.sources.ArgSink` specifies it should. -Finally, both `~argschema.sources.ArgSource` and `~argschema.sources.ArgSink` have a property called ConfigSchema, +Finally, both :class:`~argschema.sources.ArgSource` and :class:`~argschema.sources.ArgSink` have a property called ConfigSchema, which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. -For example, the default `~argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. -This is how `~argschema.ArgSchemaParser` is told what keys and values should be read to initialize the `~argschema.sources.ArgSource` -or `~argschema.sources.ArgSink`. +For example, the default :class:`~argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. +This is how :class:`~argschema.ArgSchemaParser` is told what keys and values should be read to initialize the :class:`~argschema.sources.ArgSource` +or :class:`~argschema.sources.ArgSink`. -So for example, if you wanted to define a `~argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, +So for example, if you wanted to define a :class:`~argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, and a module which had a command line interface for setting that host port and url you could do so like this. .. literalinclude:: ../../test/sources/url_source.py From 8af291d914fcb364063b0fd0c84895c3eda08f2d Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 16:46:07 -0800 Subject: [PATCH 36/86] change tests to use normal ArgSchemaParser for urlsource --- test/sources/test_url.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/sources/test_url.py b/test/sources/test_url.py index 60a199de..ca7e2d28 100644 --- a/test/sources/test_url.py +++ b/test/sources/test_url.py @@ -1,7 +1,7 @@ import requests import mock -from url_source import UrlArgSchemaParser, UrlSource - +from url_source import UrlArgSchemaParser, UrlSource, MySchema +from argschema import ArgSchemaParser # This method will be used by the mock to replace requests.get def mocked_requests_get(*args, **kwargs): class MockResponse: @@ -27,7 +27,7 @@ def json(self): @mock.patch('requests.get', side_effect=mocked_requests_get) def test_url_parser(mock_get): input_source=UrlSource(input_host='localhost',input_port=88,input_url='test.json') - mod = UrlArgSchemaParser(input_source=input_source,args = []) + mod = ArgSchemaParser(schema_type=MySchema, input_source=input_source,args = []) assert(mod.args['a']==7) @mock.patch('requests.get', side_effect=mocked_requests_get) From 5bf995896c1995ec9732a0f96ef8cc02b4bd6361 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 21:00:08 -0800 Subject: [PATCH 37/86] doc fix --- docs/user/intro.rst | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 9f09d6ce..6c059f18 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -93,19 +93,19 @@ argschema uses marshmallow (http://marshmallow.readthedocs.io/) under the hood to define the parameters schemas. It comes with a basic set of fields that you can use to define your schemas. One powerful feature of Marshmallow is that you can define custom fields that do arbitrary validation. -:class:`~argschema.fields` contains all the built-in marshmallow fields, +:class:`argschema.fields` contains all the built-in marshmallow fields, but also some useful custom ones, -such as :class:`~argschema.fields.InputFile`, -:class:`~argschema.fields.OutputFile`, -:class:`~argschema.fields.InputDir` that validate that the paths exist and have the proper +such as :class:`argschema.fields.InputFile`, +:class:`argschema.fields.OutputFile`, +:class:`argschema.fields.InputDir` that validate that the paths exist and have the proper permissions to allow files to be read or written. -Other fields, such as :class:`~argschema.fields.NumpyArray` will deserialize ordered lists of lists +Other fields, such as :class:`argschema.fields.NumpyArray` will deserialize ordered lists of lists directly into a numpy array of your choosing. -Finally, an important Field to know is :class:`~argschema.fields.Nested`, which allows you to define +Finally, an important Field to know is :class:`argschema.fields.Nested`, which allows you to define heirarchical nested structures. Note, that if you use Nested schemas, your Nested schemas should -subclass :class:`~argschema.schemas.DefaultSchema` in order that they properly fill in default values, +subclass :class:`argschema.schemas.DefaultSchema` in order that they properly fill in default values, as :class:`marshmallow.Schema` does not do that by itself. Another common question about :class:`~argschema.fields.Nested` is how you specify that @@ -201,19 +201,19 @@ For example, yaml is another perfectly reasonable choice for storing nested key json support because that is what we use most frequently at the Allen Institute, however we have generalized the concept to allow ArgSchemaParser to plugin alternative "sources" and "sinks" of parameters. -You can pass an ArgSchemaParser an :class:`~argschema.sources.ArgSource` object which implements a get_dict method, -and :class:`~argschema.ArgSchemaParser` will get its input parameters from that dictionary. +You can pass an ArgSchemaParser an :class:`argschema.sources.ArgSource` object which implements a get_dict method, +and :class:`argschema.ArgSchemaParser` will get its input parameters from that dictionary. -Similarly you can pass an :class:`~argschema.sources.ArgSink` object which implements a put_dict method, -and :method:`~argschema.ArgSchemaParser.output` will output the dictionary however that :class:`~argschema.sources.ArgSink` specifies it should. +Similarly you can pass an :class:`argschema.sources.ArgSink` object which implements a put_dict method, +and :method:`argschema.ArgSchemaParser.output` will output the dictionary however that :class:`argschema.sources.ArgSink` specifies it should. -Finally, both :class:`~argschema.sources.ArgSource` and :class:`~argschema.sources.ArgSink` have a property called ConfigSchema, +Finally, both :class:`argschema.sources.ArgSource` and :class:`argschema.sources.ArgSink` have a property called ConfigSchema, which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. -For example, the default :class:`~argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. -This is how :class:`~argschema.ArgSchemaParser` is told what keys and values should be read to initialize the :class:`~argschema.sources.ArgSource` -or :class:`~argschema.sources.ArgSink`. +For example, the default :class:`argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. +This is how :class:`argschema.ArgSchemaParser` is told what keys and values should be read to initialize the :class:`argschema.sources.ArgSource` +or :class:`argschema.sources.ArgSink`. -So for example, if you wanted to define a :class:`~argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, +So for example, if you wanted to define a :class:`argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, and a module which had a command line interface for setting that host port and url you could do so like this. .. literalinclude:: ../../test/sources/url_source.py @@ -226,7 +226,7 @@ author didn't explicitly support passing parameters by http location, and the pa Sphinx Documentation -------------------- argschema comes with a autodocumentation feature for Sphnix which will help you automatically -add documentation of your Schemas and ArgSchemaParser classes in your project. This is how the +add documentation of your Schemas and :class:`argschema.ArgSchemaParser` classes in your project. This is how the documentation of the :doc:`../tests/modules` suite included here was generated. To configure sphinx to use this function, you must be using the sphnix autodoc module and add the following to your conf.py file From f9c1ebbf173cfb2d14aa03f2af48655f5b68e0ba Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 2 Jan 2018 21:05:33 -0800 Subject: [PATCH 38/86] doc fix --- docs/user/intro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 6c059f18..9e2b8982 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -205,7 +205,7 @@ You can pass an ArgSchemaParser an :class:`argschema.sources.ArgSource` object w and :class:`argschema.ArgSchemaParser` will get its input parameters from that dictionary. Similarly you can pass an :class:`argschema.sources.ArgSink` object which implements a put_dict method, -and :method:`argschema.ArgSchemaParser.output` will output the dictionary however that :class:`argschema.sources.ArgSink` specifies it should. +and :class:`argschema.ArgSchemaParser.output` will output the dictionary however that :class:`argschema.sources.ArgSink` specifies it should. Finally, both :class:`argschema.sources.ArgSource` and :class:`argschema.sources.ArgSink` have a property called ConfigSchema, which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. From 8a9f64d8bd7cd46e47c5199e1641683ef80c77e1 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 08:14:54 -0800 Subject: [PATCH 39/86] added protocol to url_source_demo --- test/sources/url_source.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/sources/url_source.py b/test/sources/url_source.py index 2071322c..a49190d1 100644 --- a/test/sources/url_source.py +++ b/test/sources/url_source.py @@ -1,4 +1,4 @@ -from argschema.sources import ArgSource +from argschema.sources import ArgSource, ArgSink from argschema.schemas import DefaultSchema from argschema.fields import Str,Int from argschema import ArgSchemaParser @@ -9,16 +9,19 @@ class UrlSourceConfig(DefaultSchema): input_host = Str(required=True, description="host of url") input_port = Int(required=False, default=80, description="port of url") input_url = Str(required=True, description="location on host of input") + input_protocol = Str(required=False, default='http') class UrlSource(ArgSource): ConfigSchema = UrlSourceConfig def get_dict(self): - url = "http://{}:{}/{}".format(self.input_host, + url = "{}://{}:{}/{}".format(self.input_protocol, + self.input_host, self.input_port, - self.input_url) + self.input_url) response = requests.get(url) return response.json() + class UrlArgSchemaParser(ArgSchemaParser): default_configurable_sources = [UrlSource] default_schema = MySchema From 55536d7c861c6eab2fac27deb20be089ac88b2b2 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:25:36 -0800 Subject: [PATCH 40/86] removed None's from argparse when configuring sources via smart_merge --- argschema/argschema_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 68751011..9ec02958 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -146,7 +146,7 @@ def __init__(self, input_data = config_data if config_data is not None else input_data # check whether the command line arguments contain an input configuration and use that - config_data = self.__get_input_data_from_config(argsdict) + config_data = self.__get_input_data_from_config(utils.smart_merge({},argsdict)) input_data = config_data if config_data is not None else input_data # merge the command line dictionary into the input json From 08aefd3b8e6c3b1ef68af9331a323519ac3dd6a5 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:25:57 -0800 Subject: [PATCH 41/86] utilized marshmallow validation in Configurable source init --- argschema/sources/source.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index f6e258b9..f3cb6d1a 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -25,11 +25,27 @@ def d_contains_any_fields(schema,d): return True return False +class ConfigSourceSchema(mm.Schema): + pass + class ConfigurableSource(object): - ConfigSchema = None + ConfigSchema = ConfigSourceSchema def __init__(self,**kwargs): - for key,value in kwargs.items(): - self.__dict__[key]=value + """Configurable source + + Parameters + ---------- + **kwargs: dict + a set of keyword arguments which will be validated by this classes ConfigSchema + which will define the set of fields that are allowed (and their defaults) + """ + schema = self.ConfigSchema() + result,errors = schema.load(kwargs) + if len(errors)>0: + raise MisconfiguredSourceError('invalid keyword arguments passed {}'.format(kwargs)) + self.__dict__=result + for field_name, field in schema.declared_fields.items(): + self.__dict__[field_name]=result[field_name] @staticmethod def get_config(Schema,d): From 2908dca00f53d3aa39e48d29077a424fe213dfcc Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:26:15 -0800 Subject: [PATCH 42/86] added optional parameter to url_source test --- test/sources/url_source.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/sources/url_source.py b/test/sources/url_source.py index a49190d1..cc0cb540 100644 --- a/test/sources/url_source.py +++ b/test/sources/url_source.py @@ -9,15 +9,16 @@ class UrlSourceConfig(DefaultSchema): input_host = Str(required=True, description="host of url") input_port = Int(required=False, default=80, description="port of url") input_url = Str(required=True, description="location on host of input") - input_protocol = Str(required=False, default='http') + input_protocol = Str(required=False, default='http', description="url protocol to use") class UrlSource(ArgSource): ConfigSchema = UrlSourceConfig + def get_dict(self): url = "{}://{}:{}/{}".format(self.input_protocol, - self.input_host, - self.input_port, - self.input_url) + self.input_host, + self.input_port, + self.input_url) response = requests.get(url) return response.json() From 7159dcbf904ea5ae9f41acd99d5243e9e3008e73 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:26:39 -0800 Subject: [PATCH 43/86] pep8 --- test/sources/test_url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sources/test_url.py b/test/sources/test_url.py index ca7e2d28..448a6c7d 100644 --- a/test/sources/test_url.py +++ b/test/sources/test_url.py @@ -27,7 +27,7 @@ def json(self): @mock.patch('requests.get', side_effect=mocked_requests_get) def test_url_parser(mock_get): input_source=UrlSource(input_host='localhost',input_port=88,input_url='test.json') - mod = ArgSchemaParser(schema_type=MySchema, input_source=input_source,args = []) + mod = ArgSchemaParser(schema_type=MySchema, input_source=input_source, args = []) assert(mod.args['a']==7) @mock.patch('requests.get', side_effect=mocked_requests_get) From a956f79b88ca29677576f1cddf7545658cecc373 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:26:46 -0800 Subject: [PATCH 44/86] documentation update --- docs/user/intro.rst | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 9e2b8982..8a1d648c 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -197,31 +197,41 @@ command line. Alternate Sources/Sinks ----------------------- A json files are just one way that you might decide to store module parameter dictionaries or outputs. -For example, yaml is another perfectly reasonable choice for storing nested key values stores. Argschema by default provides -json support because that is what we use most frequently at the Allen Institute, however we have generalized the concept -to allow ArgSchemaParser to plugin alternative "sources" and "sinks" of parameters. +Argschema by default provides json support because that is what we use most frequently at the Allen Institute, +however we have generalized the concept to allow :class:`argschema.ArgSchemaParser` to plugin alternative +"sources" and "sinks" of parameters. -You can pass an ArgSchemaParser an :class:`argschema.sources.ArgSource` object which implements a get_dict method, -and :class:`argschema.ArgSchemaParser` will get its input parameters from that dictionary. +For example, yaml is another perfectly reasonable choice for storing nested key values stores. +`argschema.argschema_parser.ArgSchemaYamlParser` demonstrates just that functionality. So now +input_yaml and output_yaml can be specified instead. + +Furthermore, you can pass an ArgSchemaParser an :class:`argschema.sources.ArgSource` object which +implements a get_dict method, and any :class:`argschema.ArgSchemaParser` will get its input parameters +from that dictionary. Importantly, this is true even when the original module author didn't +explicitly support passing parameters from that mechanism, and the parameters will still be +deserialized and validated in a uniform manner. Similarly you can pass an :class:`argschema.sources.ArgSink` object which implements a put_dict method, -and :class:`argschema.ArgSchemaParser.output` will output the dictionary however that :class:`argschema.sources.ArgSink` specifies it should. +and :class:`argschema.ArgSchemaParser.output` will output the dictionary however that +:class:`argschema.sources.ArgSink` specifies it should. -Finally, both :class:`argschema.sources.ArgSource` and :class:`argschema.sources.ArgSink` have a property called ConfigSchema, -which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. -For example, the default :class:`argschema.sources.json_source.JsonSource.ConfigSchema` has one string field of 'input_json'. -This is how :class:`argschema.ArgSchemaParser` is told what keys and values should be read to initialize the :class:`argschema.sources.ArgSource` -or :class:`argschema.sources.ArgSink`. +Finally, both :class:`argschema.sources.ArgSource` and :class:`argschema.sources.ArgSink` +have a property called ConfigSchema, which is a :class:`marshmallow.Schema` for how to deserialize +the kwargs to it's init class. -So for example, if you wanted to define a :class:`argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, -and a module which had a command line interface for setting that host port and url you could do so like this. +For example, the default :class:`argschema.sources.json_source.JsonSource.ConfigSchema` has one string +field of 'input_json'. This is how :class:`argschema.ArgSchemaParser` is told what keys and values +should be read to initialize the :class:`argschema.sources.ArgSource` or :class:`argschema.sources.ArgSink`. -.. literalinclude:: ../../test/sources/url_source.py +So for example, if you wanted to define a :class:`argschema.sources.ArgSource` which loaded a dictionary +from a particular host, port and url, and a module which had a command line interface for setting that +host port and url you could do so like this. -so now a UrlArgSchemaParser would expect command line flags of --input_host, --input_port, --input_url, and will look to download the json -from an http location via requests, or an existing ArgSchemaParser module could be simply passed an UrlSource, even though the original module -author didn't explicitly support passing parameters by http location, and the parameters will still be deserialized and validated all the same. +.. literalinclude:: ../../test/sources/url_source.py +so now a UrlArgSchemaParser would expect command line flags of '--input_host', '--input_port', '--input_url' +(or look for them in input_data) and will look to download the json from that http location via requests +or an existing :class:`argschema.ArgSchemaParser` module could be simply passed a configured UrlSource via input_source. Sphinx Documentation -------------------- From 5658c09feb61a8aca3f0f863d85a6e11485c67af Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 16:43:54 -0800 Subject: [PATCH 45/86] doc changes --- docs/user/intro.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 8a1d648c..79241820 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -236,10 +236,11 @@ or an existing :class:`argschema.ArgSchemaParser` module could be simply passed Sphinx Documentation -------------------- argschema comes with a autodocumentation feature for Sphnix which will help you automatically -add documentation of your Schemas and :class:`argschema.ArgSchemaParser` classes in your project. This is how the -documentation of the :doc:`../tests/modules` suite included here was generated. +add documentation of your Schemas and :class:`argschema.ArgSchemaParser` classes in your project. +This is how the documentation of the :doc:`../tests/modules` suite included here was generated. -To configure sphinx to use this function, you must be using the sphnix autodoc module and add the following to your conf.py file +To configure sphnix to use this function, you must be using the sphnix autodoc module +and add the following to your conf.py file .. code-block:: python From 5072bc792c2eb4ef31cd4bd2adfa7cb540058f91 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:37:28 -0800 Subject: [PATCH 46/86] pep8 --- test/sources/test_url.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/test/sources/test_url.py b/test/sources/test_url.py index 448a6c7d..f90cf9b3 100644 --- a/test/sources/test_url.py +++ b/test/sources/test_url.py @@ -1,8 +1,10 @@ -import requests +import requests import mock from url_source import UrlArgSchemaParser, UrlSource, MySchema from argschema import ArgSchemaParser # This method will be used by the mock to replace requests.get + + def mocked_requests_get(*args, **kwargs): class MockResponse: def __init__(self, json_data, status_code): @@ -14,23 +16,26 @@ def json(self): if args[0] == 'http://localhost:88/test.json': return MockResponse({ - 'a':7, - 'nest':{ - 'one':7, - 'two':False - } - }, 200) - - + 'a': 7, + 'nest': { + 'one': 7, + 'two': False + } + }, 200) return MockResponse(None, 404) + @mock.patch('requests.get', side_effect=mocked_requests_get) def test_url_parser(mock_get): - input_source=UrlSource(input_host='localhost',input_port=88,input_url='test.json') - mod = ArgSchemaParser(schema_type=MySchema, input_source=input_source, args = []) - assert(mod.args['a']==7) + input_source = UrlSource(input_host='localhost', + input_port=88, input_url='test.json') + mod = ArgSchemaParser(schema_type=MySchema, + input_source=input_source, args=[]) + assert(mod.args['a'] == 7) + @mock.patch('requests.get', side_effect=mocked_requests_get) def test_url_parser_command_line(mock_get): - mod = UrlArgSchemaParser(args = ['--input_host','localhost','--input_port','88','--input_url','test.json']) - assert(mod.args['a']==7) + mod = UrlArgSchemaParser( + args=['--input_host', 'localhost', '--input_port', '88', '--input_url', 'test.json']) + assert(mod.args['a'] == 7) From 76572ab25976e298f0261e7a468aabaf70112efa Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 09:37:45 -0800 Subject: [PATCH 47/86] changed to use urllib to construct url --- test/sources/url_source.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/sources/url_source.py b/test/sources/url_source.py index cc0cb540..9d8f0778 100644 --- a/test/sources/url_source.py +++ b/test/sources/url_source.py @@ -4,10 +4,14 @@ from argschema import ArgSchemaParser from test_classes import MySchema import requests +try: + from urllib.parse import urlunparse +except: + from urllib import urlunparse class UrlSourceConfig(DefaultSchema): input_host = Str(required=True, description="host of url") - input_port = Int(required=False, default=80, description="port of url") + input_port = Int(required=False, default=None, description="port of url") input_url = Str(required=True, description="location on host of input") input_protocol = Str(required=False, default='http', description="url protocol to use") @@ -15,10 +19,11 @@ class UrlSource(ArgSource): ConfigSchema = UrlSourceConfig def get_dict(self): - url = "{}://{}:{}/{}".format(self.input_protocol, - self.input_host, - self.input_port, - self.input_url) + if self.input_port is None: + netloc = self.input_host + else: + netloc = "{}:{}".format(self.input_host,self.input_port) + url = urlunparse((self.input_protocol,netloc,self.input_url,None,None,None)) response = requests.get(url) return response.json() From 0e13b30fb380dd1e1c446df299888fece6b92aab Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:06:07 -0800 Subject: [PATCH 48/86] removed FileSource pattern --- argschema/sources/json_source.py | 24 +++++++++---------- argschema/sources/source.py | 41 ++++++++++++-------------------- argschema/sources/yaml_source.py | 21 +++++++--------- test/sources/test_json.py | 3 ++- test/sources/test_yaml.py | 9 ++++--- 5 files changed, 43 insertions(+), 55 deletions(-) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index b2754362..d87b4ca4 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -1,4 +1,4 @@ -from .source import FileSource, FileSink +from .source import ArgSource, ArgSink import json import marshmallow as mm import argschema @@ -11,19 +11,17 @@ class JsonOutputConfigSchema(mm.Schema): output_json = argschema.fields.OutputFile(required=True, description = 'filepath to save output_json') -class JsonSource(FileSource): +class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema + + def get_dict(self): + with open(self.input_json,'r') as fp: + return json.load(fp) - def __init__(self,input_json=None): - self.filepath = input_json - def read_file(self,fp): - return json.load(fp) - -class JsonSink(FileSink): +class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - def __init__(self,output_json=None): - self.filepath = output_json - - def write_file(self,fp,d): - json.dump(d,fp) + def put_dict(self,d): + with open(self.output_json,'w') as fp: + json.dump(d,fp) + diff --git a/argschema/sources/source.py b/argschema/sources/source.py index f3cb6d1a..42e391b9 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -19,6 +19,8 @@ class MultipleConfiguredSourceError(ConfigurableSourceError): pass def d_contains_any_fields(schema,d): + if len(schema.declared_fields)==0: + return True for field_name, field in schema.declared_fields.items(): if field_name in d.keys(): if d[field_name] is not None: @@ -40,12 +42,8 @@ def __init__(self,**kwargs): which will define the set of fields that are allowed (and their defaults) """ schema = self.ConfigSchema() - result,errors = schema.load(kwargs) - if len(errors)>0: - raise MisconfiguredSourceError('invalid keyword arguments passed {}'.format(kwargs)) - self.__dict__=result - for field_name, field in schema.declared_fields.items(): - self.__dict__[field_name]=result[field_name] + result = self.get_config(self.ConfigSchema,kwargs) + self.__dict__.update(result) @staticmethod def get_config(Schema,d): @@ -58,30 +56,21 @@ def get_config(Schema,d): raise MisconfiguredSourceError("Source incorrectly configured\n" + json.dumps(errors, indent=2)) else: return result + class ArgSource(ConfigurableSource): def get_dict(self): pass -class ArgSink(ConfigurableSource): - def put_dict(self,d): - pass - -class FileSource(ArgSource): - - def get_dict(self): - with open(self.filepath,'r') as fp: - d = self.read_file(fp) - return d - - def read_file(self,fp): - pass - -class FileSink(ArgSink): - - def write_file(self,fp,d): - pass +def get_input_from_config(ArgSource, config_d): + if config_d is not None: + input_config_d = ArgSource.get_config(ArgSource.ConfigSchema, config_d) + input_source = ArgSource(**input_config_d) + input_data = input_source.get_dict() + return input_data + else: + raise NotConfiguredSourceError('No dictionary provided') +class ArgSink(ConfigurableSource): def put_dict(self,d): - with open(self.filepath,'w') as fp: - self.write_file(fp,d) \ No newline at end of file + pass \ No newline at end of file diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index 480f4d87..1692b9dd 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -1,5 +1,5 @@ import yaml -from .source import FileSource,FileSink +from .source import ArgSource,ArgSink import argschema import marshmallow as mm @@ -11,20 +11,17 @@ class YamlOutputConfigSchema(mm.Schema): output_yaml = argschema.fields.OutputFile(required=True, description = 'filepath to save output yaml') -class YamlSource(FileSource): +class YamlSource(ArgSource): ConfigSchema = YamlInputConfigSchema - def __init__(self,input_yaml=None): - self.filepath = input_yaml + def get_dict(self): + with open(self.input_yaml,'r') as fp: + return yaml.load(fp) - def read_file(self,fp): - return yaml.load(fp) - -class YamlSink(FileSink): +class YamlSink(ArgSink): ConfigSchema = YamlOutputConfigSchema - def __init__(self,output_yaml=None): - self.filepath = output_yaml + def put_dict(self,d): + with open(self.output_yaml,'w') as fp: + yaml.dump(d,fp,default_flow_style=False) - def write_file(self,fp,d): - yaml.dump(d,fp,default_flow_style=False) \ No newline at end of file diff --git a/test/sources/test_json.py b/test/sources/test_json.py index 3055cb20..1acc001b 100644 --- a/test/sources/test_json.py +++ b/test/sources/test_json.py @@ -22,7 +22,8 @@ def test_input_file(tmpdir_factory): return str(file_in) def test_json_source(test_input_file): - mod = MyParser(input_source= JsonSource(test_input_file), args=[]) + source = JsonSource(input_json=test_input_file) + mod = MyParser(input_source= source, args=[]) def test_json_source_command(test_input_file): mod = MyParser(args = ['--input_json',test_input_file]) \ No newline at end of file diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py index 585a8d29..96c0bce2 100644 --- a/test/sources/test_yaml.py +++ b/test/sources/test_yaml.py @@ -42,7 +42,8 @@ def test_json_input_file(tmpdir_factory): def test_yaml_source(test_yaml_input_file): - mod = MyParser(input_source=YamlSource(test_yaml_input_file), args=[]) + source = YamlSource(input_yaml=test_yaml_input_file) + mod = MyParser(input_source=source, args=[]) def test_yaml_source_command(test_yaml_input_file): @@ -54,8 +55,10 @@ def test_yaml_sink(test_yaml_input_file, tmpdir): output_data = { 'a': 3 } - mod = MyParser(input_source=YamlSource(test_yaml_input_file), - output_sink=YamlSink(str(outfile))) + source = YamlSource(input_yaml=test_yaml_input_file) + sink = YamlSink(output_yaml = str(outfile)) + mod = MyParser(input_source=source, + output_sink=sink) mod.output(output_data) with open(str(outfile), 'r') as fp: From 290d0e843b299aefb6100b251becb392400ce916 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:06:22 -0800 Subject: [PATCH 49/86] moved get_input to source module --- argschema/argschema_parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 9ec02958..8d9d8ae8 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -9,7 +9,7 @@ import marshmallow as mm from .sources.json_source import JsonSource, JsonSink from .sources.yaml_source import YamlSource, YamlSink -from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError +from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError, get_input_from_config def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema @@ -222,8 +222,8 @@ def __get_input_data_from_config(self, d): input_set = False input_data = None for InputSource in self.default_configurable_sources: - try: - input_data = get_input(InputSource, d) + try: + input_data = get_input_from_config(InputSource, d) if input_set: raise MultipleConfiguredSourceError( "more then one InputSource configuration present in {}".format(d)) From 327c4478b4dc3493730be31de9357eff5dfe9812 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:07:40 -0800 Subject: [PATCH 50/86] fixed python2 import --- test/sources/url_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sources/url_source.py b/test/sources/url_source.py index 9d8f0778..56e67808 100644 --- a/test/sources/url_source.py +++ b/test/sources/url_source.py @@ -7,7 +7,7 @@ try: from urllib.parse import urlunparse except: - from urllib import urlunparse + from urlparse import urlunparse class UrlSourceConfig(DefaultSchema): input_host = Str(required=True, description="host of url") From 9ac98666068a181bccf90bb9bb7f76750789c7a6 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:18:23 -0800 Subject: [PATCH 51/86] added docstrings --- argschema/sources/source.py | 102 ++++++++++++++++++++++++++++++------ 1 file changed, 86 insertions(+), 16 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 42e391b9..3671fd70 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -1,38 +1,61 @@ import json import marshmallow as mm + class ConfigurableSourceError(mm.ValidationError): """Base Exception class for configurable sources""" pass + class MisconfiguredSourceError(ConfigurableSourceError): """Exception when a source configuration was present in part but failed validation""" pass + class NotConfiguredSourceError(ConfigurableSourceError): """Exception when the source configuration is simply completely missing""" pass + class MultipleConfiguredSourceError(ConfigurableSourceError): """Exception when there is more than one validly configured Source configured""" pass -def d_contains_any_fields(schema,d): - if len(schema.declared_fields)==0: + +def d_contains_any_fields(schema, d): + """function to test if a dictionary contains any elements of a schema + + Parameters + ---------- + schema: marshmallow.Schema + a marshmallow schema to test d with + d: dict + the dictionary to test whether it contains any elements of a schema + + Returns + ------- + bool: + True/False whether d contains any elements of a schema. If a schema contains no elements, returns True + """ + + if len(schema.declared_fields) == 0: return True for field_name, field in schema.declared_fields.items(): if field_name in d.keys(): if d[field_name] is not None: - return True + return True return False + class ConfigSourceSchema(mm.Schema): pass + class ConfigurableSource(object): ConfigSchema = ConfigSourceSchema - def __init__(self,**kwargs): + + def __init__(self, **kwargs): """Configurable source Parameters @@ -42,27 +65,73 @@ def __init__(self,**kwargs): which will define the set of fields that are allowed (and their defaults) """ schema = self.ConfigSchema() - result = self.get_config(self.ConfigSchema,kwargs) + result = self.get_config(self.ConfigSchema, kwargs) self.__dict__.update(result) - + @staticmethod - def get_config(Schema,d): - schema = Schema() - if not d_contains_any_fields(schema,d): - raise NotConfiguredSourceError("This source is not present in \n" + json.dumps(d, indent=2)) + def get_config(ConfigSchema, d): + """A static method to get the proper validated configuration keyword arguments/dictionary + of a Configurable source from a dictionary + + Parameters + ---------- + ConfigSchema: marshmallow.Schema + a marshmallow schema that defines the configuration schema for this ConfigurableSource + d: dict + a dictionary that might contain a proper configuration of this schema + + Returns + ------- + dict + a dictionary of configuration values that has been properly deserialized and validated by + ConfigSchema + Raises + ------ + NotConfiguredSourceError + if the configation dictionary does not contain a configuration for this source + MisconfiguredSourceError + if the configuration dictionary contains a configuration but it is invalid + """ + schema = ConfigSchema() + if not d_contains_any_fields(schema, d): + raise NotConfiguredSourceError( + "This source is not present in \n" + json.dumps(d, indent=2)) else: - result,errors = schema.load(d) - if len(errors)>0: - raise MisconfiguredSourceError("Source incorrectly configured\n" + json.dumps(errors, indent=2)) + result, errors = schema.load(d) + if len(errors) > 0: + raise MisconfiguredSourceError( + "Source incorrectly configured\n" + json.dumps(errors, indent=2)) else: return result - + class ArgSource(ConfigurableSource): def get_dict(self): pass + def get_input_from_config(ArgSource, config_d): + """function to return the input dictionary from an ArgSource, given a configuration dictionary + + Parameters + ---------- + ArgSource: class(ArgSource) + The ArgSource class subclass that you want to get input from + config_d: a dictionary that might contain a configuration for this source + + Returns + ------- + dict + a dictionary returned by ArgSource.get_dict() after validating configuration + and instantiating an ArgSource instance + + Raises + ------ + NotConfiguredSourceError + if the configation dictionary does not contain a configuration for this source + MisconfiguredSourceError + if the configuration dictionary contains a configuration but it is invalid + """ if config_d is not None: input_config_d = ArgSource.get_config(ArgSource.ConfigSchema, config_d) input_source = ArgSource(**input_config_d) @@ -71,6 +140,7 @@ def get_input_from_config(ArgSource, config_d): else: raise NotConfiguredSourceError('No dictionary provided') + class ArgSink(ConfigurableSource): - def put_dict(self,d): - pass \ No newline at end of file + def put_dict(self, d): + pass From 10859360bc8d5a90ad8545094ee01f1ed62ba812 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:21:42 -0800 Subject: [PATCH 52/86] added doc strings to methods that need to be implemented --- argschema/sources/source.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 3671fd70..cf5c064b 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -107,6 +107,7 @@ def get_config(ConfigSchema, d): class ArgSource(ConfigurableSource): def get_dict(self): + """method that must be implemented to enable an ArgSource to return a dictionary""" pass @@ -143,4 +144,11 @@ def get_input_from_config(ArgSource, config_d): class ArgSink(ConfigurableSource): def put_dict(self, d): + """method that must be implemented to enable an ArgSink to write a dictionary + + Parameters + ---------- + d: dict + the dictionary to write + """ pass From cf0821d75008aa821050a8e5df4ff83db90b7eac Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:22:00 -0800 Subject: [PATCH 53/86] typo --- argschema/sources/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index cf5c064b..d9dd32bc 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -107,7 +107,7 @@ def get_config(ConfigSchema, d): class ArgSource(ConfigurableSource): def get_dict(self): - """method that must be implemented to enable an ArgSource to return a dictionary""" + """method that must be implemented to enable an ArgSource to return a dictionary""" pass From 877b0bdeba7ea1bc998bcd9ca944a04d55403d81 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:27:30 -0800 Subject: [PATCH 54/86] documenation fixes --- argschema/argschema_parser.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 8d9d8ae8..2e266605 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -78,19 +78,20 @@ class ArgSchemaParser(object): Parameters ---------- input_data : dict or None - dictionary parameters to fall back on if all source aren't present + dictionary parameters to fall back on if not source is given or configured via command line schema_type : schemas.ArgSchema the schema to use to validate the parameters output_schema_type : marshmallow.Schema - the schema to use to validate the output_json, used by self.output + the schema to use to validate the output, used by self.output input_source : argschema.sources.source.Source a generic source of a dictionary output_sink : argschema.sources.source.Source - a generic output to put output dictionary + a generic sink to write output dictionary to args : list or None - command line arguments passed to the module, if None use argparse to parse the command line, set to [] if you want to bypass command line parsing + command line arguments passed to the module, if None use argparse to parse the command line, + set to [] if you want to bypass command line parsing logger_name : str - name of logger from the logging module you want to instantiate 'argschema' + name of logger from the logging module you want to instantiate default ('argschema') Raises ------- @@ -284,7 +285,7 @@ def output(self, d, output_path=None, sink=None, **sink_options): output_d = self.get_output_json(d) if output_path is not None: - self.logger.warning('DEPRECATED, pass sink instead') + self.logger.warning('DEPRECATED, pass output_sink instead') sink = JsonSink(output_json=output_path) if sink is not None: sink.put_dict(output_d) From f488fa1eac0e83f975f0a746ce7a2483ef5f8cb7 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 3 Jan 2018 10:52:50 -0800 Subject: [PATCH 55/86] doc fixes --- docs/user/intro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 79241820..47b49b39 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -202,7 +202,7 @@ however we have generalized the concept to allow :class:`argschema.ArgSchemaPars "sources" and "sinks" of parameters. For example, yaml is another perfectly reasonable choice for storing nested key values stores. -`argschema.argschema_parser.ArgSchemaYamlParser` demonstrates just that functionality. So now +:class:`argschema.argschema_parser.ArgSchemaYamlParser` demonstrates just that functionality. So now input_yaml and output_yaml can be specified instead. Furthermore, you can pass an ArgSchemaParser an :class:`argschema.sources.ArgSource` object which From 04fc7871e228ce936652676183bccce896e0d060 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 15 Jan 2018 08:48:44 -0800 Subject: [PATCH 56/86] doc update --- docs/user/intro.rst | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 47b49b39..87a342e9 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -196,12 +196,12 @@ command line. Alternate Sources/Sinks ----------------------- -A json files are just one way that you might decide to store module parameter dictionaries or outputs. +Json files are just one way that you might decide to serialize module parameters or outputs. Argschema by default provides json support because that is what we use most frequently at the Allen Institute, however we have generalized the concept to allow :class:`argschema.ArgSchemaParser` to plugin alternative -"sources" and "sinks" of parameters. +"sources" and "sinks" of dictionary inputs and outputs. -For example, yaml is another perfectly reasonable choice for storing nested key values stores. +For example, yaml is another reasonable choice for storing nested key-value stores. :class:`argschema.argschema_parser.ArgSchemaYamlParser` demonstrates just that functionality. So now input_yaml and output_yaml can be specified instead. @@ -219,9 +219,10 @@ Finally, both :class:`argschema.sources.ArgSource` and :class:`argschema.sources have a property called ConfigSchema, which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. -For example, the default :class:`argschema.sources.json_source.JsonSource.ConfigSchema` has one string +For example, the default :class:`argschema.sources.json_source.JsonSource` has one string field of 'input_json'. This is how :class:`argschema.ArgSchemaParser` is told what keys and values -should be read to initialize the :class:`argschema.sources.ArgSource` or :class:`argschema.sources.ArgSink`. +should be read to initialize a :class:`argschema.sources.ArgSource` or + :class:`argschema.sources.ArgSink` instance. So for example, if you wanted to define a :class:`argschema.sources.ArgSource` which loaded a dictionary from a particular host, port and url, and a module which had a command line interface for setting that @@ -229,9 +230,11 @@ host port and url you could do so like this. .. literalinclude:: ../../test/sources/url_source.py -so now a UrlArgSchemaParser would expect command line flags of '--input_host', '--input_port', '--input_url' -(or look for them in input_data) and will look to download the json from that http location via requests -or an existing :class:`argschema.ArgSchemaParser` module could be simply passed a configured UrlSource via input_source. +so now a UrlArgSchemaParser would expect command line flags of '--input_host' and '--input_url', and +optionally '--input_port','--input_protocol' (or look for them in input_data) and will look to download +the json from that http location via requests. In addition, an existing :class:`argschema.ArgSchemaParser` +module could be simply passed a configured UrlSource via input_source, +and it would get its parameters from there. Sphinx Documentation -------------------- From 4f59db4f2423a87d336b9085b184fce52850e4f7 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 18:34:57 -0800 Subject: [PATCH 57/86] wired up options for json sink --- argschema/argschema_parser.py | 4 +-- argschema/sources/json_source.py | 4 +-- argschema/utils.py | 49 +++++++++++++++++++++++++------- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 2e266605..50e93e44 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -136,7 +136,7 @@ def __init__(self, # build a command line parser from the input schemas and configurations p = utils.schema_argparser(self.schema, io_schemas) argsobj = p.parse_args(args) - argsdict = utils.args_to_dict(argsobj, self.schema) + argsdict = utils.args_to_dict(argsobj, [self.schema]+io_schemas) self.logger.debug('argsdict is {}'.format(argsdict)) # if you received an input_source, get the dictionary from there @@ -288,7 +288,7 @@ def output(self, d, output_path=None, sink=None, **sink_options): self.logger.warning('DEPRECATED, pass output_sink instead') sink = JsonSink(output_json=output_path) if sink is not None: - sink.put_dict(output_d) + sink.put_dict(output_d,**sink_options) else: self.output_sink.put_dict(output_d, **sink_options) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index d87b4ca4..c601229e 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -21,7 +21,7 @@ def get_dict(self): class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - def put_dict(self,d): + def put_dict(self,d,**json_options): with open(self.output_json,'w') as fp: - json.dump(d,fp) + json.dump(d,fp,**json_options) diff --git a/argschema/utils.py b/argschema/utils.py index 0b752008..2a2e1433 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -82,16 +82,42 @@ def cli_error_dict(arg_path, field_type, index=0): else: return {arg_path[index]: cli_error_dict(arg_path, field_type, index + 1)} +def get_field_def_from_schema(parts,schema): + """function to get a field_definition from a particular key, specified by it's parts list -def args_to_dict(argsobj, schema=None): + Parameters + ---------- + parts : list[str] + the list of keys to get this schema + schema: marshmallow.Schema + the marshmallow schema to look up this key + + Returns + ------- + marshmallow.Field or None + returns the field in the schema if it exists, otherwise returns None + """ + current_schema = schema + for part in parts: + if part not in current_schema.fields.keys(): + return None + else: + if current_schema.only and part not in current_schema.only: + field_def = None + else: + field_def = current_schema.fields[part] + if isinstance(field_def, fields.Nested): + current_schema = field_def.schema + return field_def +def args_to_dict(argsobj, schemas=None): """function to convert namespace returned by argsparse into a nested dictionary Parameters ---------- argsobj : argparse.Namespace Namespace object returned by standard argparse.parse function - schema : marshmallow.Schema - Optional schema which will be used to cast fields via `FIELD_TYPE_MAP` + schemas : list[marshmallow.Schema] + Optional list of schemas which will be used to cast fields via `FIELD_TYPE_MAP` Returns @@ -105,18 +131,19 @@ def args_to_dict(argsobj, schema=None): errors = {} field_def = None for field in argsdict.keys(): - current_schema = schema parts = field.split('.') root = d for i in range(len(parts)): - if current_schema is not None: - if current_schema.only and parts[i] not in current_schema.only: - field_def = None - else: - field_def = current_schema.fields[parts[i]] - if isinstance(field_def, fields.Nested): - current_schema = field_def.schema + if i == (len(parts) - 1): + field_def = None + for schema in schemas: + field_def = get_field_def_from_schema(parts,schema) + if field_def is not None: + break + + #field_def = next(get_field_def(parts,schema) for schema in schemas if field_in_schema(parts,schema)) + value = argsdict.get(field) if value is not None: try: From 16e7c4b7fd621321ec840ac17b66c1e73fc1b2b4 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 18:43:09 -0800 Subject: [PATCH 58/86] fixed docstring --- argschema/argschema_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 50e93e44..bf2ca190 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -273,10 +273,10 @@ def output(self, d, output_path=None, sink=None, **sink_options): output_sink to output to (optional default to self.output_source) output_path: str path to save to output file, optional (with default to self.mod['output_json'] location) + (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) **sink_options : will be passed through to sink.put_dict - - (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) + Raises ------ marshmallow.ValidationError From 93a191f2165523de782bfb9a683be6d539a95bc5 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 19:06:39 -0800 Subject: [PATCH 59/86] wip --- argschema/argschema_parser.py | 9 +++------ argschema/sources/json_source.py | 10 ++++++---- test/test_argschema_parser.py | 4 ++-- test/test_output.py | 4 +++- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index bf2ca190..509eb423 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -261,7 +261,7 @@ def get_output_json(self, d): return output_json - def output(self, d, output_path=None, sink=None, **sink_options): + def output(self,d,sink=None): """method for outputing dictionary to the output_json file path after validating it through the output_schema_type @@ -284,13 +284,10 @@ def output(self, d, output_path=None, sink=None, **sink_options): """ output_d = self.get_output_json(d) - if output_path is not None: - self.logger.warning('DEPRECATED, pass output_sink instead') - sink = JsonSink(output_json=output_path) if sink is not None: - sink.put_dict(output_d,**sink_options) + sink.put_dict(output_d) else: - self.output_sink.put_dict(output_d, **sink_options) + self.output_sink.put_dict(output_d) def load_schema_with_defaults(self, schema, args): """method for deserializing the arguments dictionary (args) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index c601229e..1e3bbdbc 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -10,18 +10,20 @@ class JsonInputConfigSchema(mm.Schema): class JsonOutputConfigSchema(mm.Schema): output_json = argschema.fields.OutputFile(required=True, description = 'filepath to save output_json') - + output_json_indent = argschema.fields.Int(required=False, + default = mm.missing, + description = 'whether to indent options or not') class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema def get_dict(self): with open(self.input_json,'r') as fp: - return json.load(fp) + return json.load(fp,) class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - def put_dict(self,d,**json_options): + def put_dict(self,d): with open(self.output_json,'w') as fp: - json.dump(d,fp,**json_options) + json.dump(d,fp,indent=self.output_json_indent) diff --git a/test/test_argschema_parser.py b/test/test_argschema_parser.py index 60de0dd7..043f4b14 100644 --- a/test/test_argschema_parser.py +++ b/test/test_argschema_parser.py @@ -88,8 +88,8 @@ def test_parser_output(tmpdir_factory): } } mod = MyParser(input_data=input_data) - - mod.output(mod.args, output_path=str(json_path), indent=2) + json_sink = argschema.sources.JsonSink(output_json=str(json_path),indent=2) + mod.output(mod.args, sink=json_sink) with open(str(json_path), 'r') as jf: obt = json.load(jf) assert(obt['nest']['one'] == mod.args['nest']['one']) diff --git a/test/test_output.py b/test/test_output.py index 0678f3a7..5bc8583b 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -1,6 +1,7 @@ from argschema import ArgSchemaParser from argschema.schemas import DefaultSchema from argschema.fields import Str, Int, NumpyArray +from argschema.sources import JsonSink import json import numpy as np import pytest @@ -96,7 +97,8 @@ def test_alt_output(tmpdir): "b": 5, "M": M } - mod.output(output, str(file_out_2)) + sink = JsonSink(output_json=str(file_out)) + mod.output(output, sink=sink) with open(str(file_out_2), 'r') as fp: actual_output = json.load(fp) assert actual_output == expected_output From 864d6f698925818d8001f7b1f74d77cf5914f31e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 30 Jan 2018 19:18:53 -0800 Subject: [PATCH 60/86] alternative way of handling indent options --- argschema/sources/json_source.py | 7 ++++--- test/test_argschema_parser.py | 2 +- test/test_output.py | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 1e3bbdbc..558ab365 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -11,7 +11,6 @@ class JsonOutputConfigSchema(mm.Schema): output_json = argschema.fields.OutputFile(required=True, description = 'filepath to save output_json') output_json_indent = argschema.fields.Int(required=False, - default = mm.missing, description = 'whether to indent options or not') class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema @@ -22,8 +21,10 @@ def get_dict(self): class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - + def __init__(self,output_json=None,output_json_indent=None): + self.output_json = output_json + self.indent = output_json_indent def put_dict(self,d): with open(self.output_json,'w') as fp: - json.dump(d,fp,indent=self.output_json_indent) + json.dump(d,fp,indent=self.indent) diff --git a/test/test_argschema_parser.py b/test/test_argschema_parser.py index 043f4b14..645123d8 100644 --- a/test/test_argschema_parser.py +++ b/test/test_argschema_parser.py @@ -88,7 +88,7 @@ def test_parser_output(tmpdir_factory): } } mod = MyParser(input_data=input_data) - json_sink = argschema.sources.JsonSink(output_json=str(json_path),indent=2) + json_sink = argschema.sources.JsonSink(output_json=str(json_path),output_json_indent=2) mod.output(mod.args, sink=json_sink) with open(str(json_path), 'r') as jf: obt = json.load(jf) diff --git a/test/test_output.py b/test/test_output.py index 5bc8583b..5878ec7a 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -97,9 +97,9 @@ def test_alt_output(tmpdir): "b": 5, "M": M } - sink = JsonSink(output_json=str(file_out)) - mod.output(output, sink=sink) - with open(str(file_out_2), 'r') as fp: + sink = JsonSink(output_json= str(file_out_2)) + mod.output(output,sink=sink) + with open(str(file_out_2),'r') as fp: actual_output = json.load(fp) assert actual_output == expected_output From b18545bdb74d51cc3351f540a74fcfd1f25f4574 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 28 Apr 2018 08:48:54 -0700 Subject: [PATCH 61/86] style changes --- argschema/sources/source.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index d9dd32bc..a66e92d3 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -1,4 +1,3 @@ -import json import marshmallow as mm @@ -56,7 +55,7 @@ class ConfigurableSource(object): ConfigSchema = ConfigSourceSchema def __init__(self, **kwargs): - """Configurable source + """Configurable source Parameters ---------- @@ -64,7 +63,7 @@ def __init__(self, **kwargs): a set of keyword arguments which will be validated by this classes ConfigSchema which will define the set of fields that are allowed (and their defaults) """ - schema = self.ConfigSchema() + self.schema = self.ConfigSchema() result = self.get_config(self.ConfigSchema, kwargs) self.__dict__.update(result) @@ -95,12 +94,12 @@ def get_config(ConfigSchema, d): schema = ConfigSchema() if not d_contains_any_fields(schema, d): raise NotConfiguredSourceError( - "This source is not present in \n" + json.dumps(d, indent=2)) + "This source is not present in \n {}".format(d)) else: result, errors = schema.load(d) if len(errors) > 0: raise MisconfiguredSourceError( - "Source incorrectly configured\n" + json.dumps(errors, indent=2)) + "Source incorrectly configured\n {}".format(errors)) else: return result @@ -123,7 +122,7 @@ def get_input_from_config(ArgSource, config_d): Returns ------- dict - a dictionary returned by ArgSource.get_dict() after validating configuration + a dictionary returned by ArgSource.get_dict() after validating configuration and instantiating an ArgSource instance Raises @@ -145,7 +144,7 @@ def get_input_from_config(ArgSource, config_d): class ArgSink(ConfigurableSource): def put_dict(self, d): """method that must be implemented to enable an ArgSink to write a dictionary - + Parameters ---------- d: dict From 4a418de044fadabb3a2724cd4029aa25891c5634 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 28 Apr 2018 14:46:15 -0700 Subject: [PATCH 62/86] flake8 --- argschema/argschema_parser.py | 16 ++++++++-------- argschema/schemas.py | 2 +- argschema/utils.py | 13 ++++++++----- test/test_argschema_parser.py | 2 +- test/test_output.py | 6 +++--- 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 509eb423..89eb1b4e 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -1,16 +1,15 @@ '''Module that contains the base class ArgSchemaParser which should be subclassed when using this library ''' -import json import logging from . import schemas from . import utils -from . import fields import marshmallow as mm from .sources.json_source import JsonSource, JsonSink from .sources.yaml_source import YamlSource, YamlSink from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError, get_input_from_config + def contains_non_default_schemas(schema, schema_list=[]): """returns True if this schema contains a schema which was not an instance of DefaultSchema @@ -88,7 +87,7 @@ class ArgSchemaParser(object): output_sink : argschema.sources.source.Source a generic sink to write output dictionary to args : list or None - command line arguments passed to the module, if None use argparse to parse the command line, + command line arguments passed to the module, if None use argparse to parse the command line, set to [] if you want to bypass command line parsing logger_name : str name of logger from the logging module you want to instantiate default ('argschema') @@ -136,7 +135,7 @@ def __init__(self, # build a command line parser from the input schemas and configurations p = utils.schema_argparser(self.schema, io_schemas) argsobj = p.parse_args(args) - argsdict = utils.args_to_dict(argsobj, [self.schema]+io_schemas) + argsdict = utils.args_to_dict(argsobj, [self.schema] + io_schemas) self.logger.debug('argsdict is {}'.format(argsdict)) # if you received an input_source, get the dictionary from there @@ -147,7 +146,8 @@ def __init__(self, input_data = config_data if config_data is not None else input_data # check whether the command line arguments contain an input configuration and use that - config_data = self.__get_input_data_from_config(utils.smart_merge({},argsdict)) + config_data = self.__get_input_data_from_config( + utils.smart_merge({}, argsdict)) input_data = config_data if config_data is not None else input_data # merge the command line dictionary into the input json @@ -261,14 +261,14 @@ def get_output_json(self, d): return output_json - def output(self,d,sink=None): + def output(self, d, sink=None): """method for outputing dictionary to the output_json file path after validating it through the output_schema_type Parameters ---------- d:dict - output dictionary to output + output dictionary to output sink: argschema.sources.source.ArgSink output_sink to output to (optional default to self.output_source) output_path: str @@ -276,7 +276,7 @@ def output(self,d,sink=None): (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) **sink_options : will be passed through to sink.put_dict - + Raises ------ marshmallow.ValidationError diff --git a/argschema/schemas.py b/argschema/schemas.py index 5c17b1ce..cd27f599 100644 --- a/argschema/schemas.py +++ b/argschema/schemas.py @@ -1,5 +1,5 @@ import marshmallow as mm -from .fields import LogLevel, InputFile, OutputFile +from .fields import LogLevel class DefaultSchema(mm.Schema): diff --git a/argschema/utils.py b/argschema/utils.py index 2a2e1433..ec9aed58 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -82,7 +82,8 @@ def cli_error_dict(arg_path, field_type, index=0): else: return {arg_path[index]: cli_error_dict(arg_path, field_type, index + 1)} -def get_field_def_from_schema(parts,schema): + +def get_field_def_from_schema(parts, schema): """function to get a field_definition from a particular key, specified by it's parts list Parameters @@ -91,7 +92,7 @@ def get_field_def_from_schema(parts,schema): the list of keys to get this schema schema: marshmallow.Schema the marshmallow schema to look up this key - + Returns ------- marshmallow.Field or None @@ -109,6 +110,8 @@ def get_field_def_from_schema(parts,schema): if isinstance(field_def, fields.Nested): current_schema = field_def.schema return field_def + + def args_to_dict(argsobj, schemas=None): """function to convert namespace returned by argsparse into a nested dictionary @@ -138,11 +141,11 @@ def args_to_dict(argsobj, schemas=None): if i == (len(parts) - 1): field_def = None for schema in schemas: - field_def = get_field_def_from_schema(parts,schema) + field_def = get_field_def_from_schema(parts, schema) if field_def is not None: break - - #field_def = next(get_field_def(parts,schema) for schema in schemas if field_in_schema(parts,schema)) + + # field_def = next(get_field_def(parts,schema) for schema in schemas if field_in_schema(parts,schema)) value = argsdict.get(field) if value is not None: diff --git a/test/test_argschema_parser.py b/test/test_argschema_parser.py index 645123d8..dd8c56b7 100644 --- a/test/test_argschema_parser.py +++ b/test/test_argschema_parser.py @@ -88,7 +88,7 @@ def test_parser_output(tmpdir_factory): } } mod = MyParser(input_data=input_data) - json_sink = argschema.sources.JsonSink(output_json=str(json_path),output_json_indent=2) + json_sink = argschema.sources.JsonSink(output_json=str(json_path), output_json_indent=2) mod.output(mod.args, sink=json_sink) with open(str(json_path), 'r') as jf: obt = json.load(jf) diff --git a/test/test_output.py b/test/test_output.py index 5878ec7a..4f24de73 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -97,9 +97,9 @@ def test_alt_output(tmpdir): "b": 5, "M": M } - sink = JsonSink(output_json= str(file_out_2)) - mod.output(output,sink=sink) - with open(str(file_out_2),'r') as fp: + sink = JsonSink(output_json=str(file_out_2)) + mod.output(output, sink=sink) + with open(str(file_out_2), 'r') as fp: actual_output = json.load(fp) assert actual_output == expected_output From f4da011e0ed61453f3ce92e6f6a97afe7e9c83ac Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:16:18 -0800 Subject: [PATCH 63/86] added marshmallow 3 compatability --- argschema/sources/source.py | 7 ++++--- argschema/utils.py | 8 +------- test/test_utils.py | 2 +- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index a66e92d3..e1154130 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -96,10 +96,11 @@ def get_config(ConfigSchema, d): raise NotConfiguredSourceError( "This source is not present in \n {}".format(d)) else: - result, errors = schema.load(d) - if len(errors) > 0: + try: + result = schema.load(d, unknown=mm.EXCLUDE) + except mm.ValidationError as e: raise MisconfiguredSourceError( - "Source incorrectly configured\n {}".format(errors)) + "Source incorrectly configured\n {}".format(e)) else: return result diff --git a/argschema/utils.py b/argschema/utils.py index ec9aed58..b753f72f 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -430,13 +430,7 @@ def load(schema, d): deserialized and validated dictionary """ - results = schema.load(d) - if isinstance(results, tuple): - (results, errors) = results - if len(errors) > 0: - raise mm.ValidationError(errors) - - return results + return schema.load(d, unknown=mm.EXCLUDE) def dump(schema, d): diff --git a/test/test_utils.py b/test/test_utils.py index b75b8222..799f3e01 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -117,7 +117,7 @@ def test_schema_argparser_with_baseball(): 'name': 'Roger Clemens', 'number': 21 }, - 'based_occupied': [1, 2, 3], + 'bases_occupied': [1, 2, 3], 'outs': 2, 'strikes': 2, 'balls': 3, From ffaaae62d45d6b75fc3def992e4f576d1d234173 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:16:36 -0800 Subject: [PATCH 64/86] bump marshmallow req --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 72c76fad..26770969 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ numpy -marshmallow +marshmallow==3.0.0rc4 pyyaml From ea82c89399dbde3a506d62bbea36891d26f77bbf Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:25:57 -0800 Subject: [PATCH 65/86] remove OptionList --- argschema/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/__init__.py b/argschema/__init__.py index 28173c45..ec38f3fa 100644 --- a/argschema/__init__.py +++ b/argschema/__init__.py @@ -1,5 +1,5 @@ '''argschema: flexible definition, validation and setting of parameters''' -from .fields import InputFile, InputDir, OutputFile, OptionList # noQA:F401 +from .fields import InputFile, InputDir, OutputFile # noQA:F401 from .schemas import ArgSchema # noQA:F401 from .argschema_parser import ArgSchemaParser # noQA:F401 From e129152b2f1cfec3cffbaec11a12ffe5206e0fb3 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:42:02 -0800 Subject: [PATCH 66/86] fixing test --- test/test_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 799f3e01..d47391f0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -132,10 +132,11 @@ def test_schema_argparser_with_baseball(): parser = utils.schema_argparser(schema) help = parser.format_help() help = help.replace('\n', '').replace(' ', '') + print(help) assert( '--strikesSTRIKEShowmanystrikes(0-2)(REQUIRED)(validoptionsare[0,1,2])' in help) assert( - '--bases_occupied[BASES_OCCUPIED[BASES_OCCUPIED...]]whichbasesareoccupied(constrainedlist)(validoptionsare[1,2,3])' in help) + '--bases_occupiedBASES_OCCUPIEDwhichbasesareoccupied(constrainedlist)(validoptionsare[1,2,3])' in help) assert( '--ballsBALLSnumberofballs(0-4)(default=0)(validoptionsare[0,1,2,3])' in help) assert("--pitcher.numberPITCHER.NUMBERplayer'snumber(mustbe>0)(REQUIRED)" in help) From 577457998231e48a26a06c67ed264787e425056c Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:51:41 -0800 Subject: [PATCH 67/86] making alpha version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9c5b51c9..88099c9b 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ test_required = f.read().splitlines() setup(name='argschema', - version='2.0.0', + version='2.0.0a1', description=' a wrapper for setting up modules that can have parameters specified by command line arguments,\ json_files, or dictionary objects. Providing a common wrapper for data processing modules.', author='Forrest Collman,David Feng', From 4de84bfcd326832792bee85a4b147341e09ab058 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:51:46 -0800 Subject: [PATCH 68/86] flake8 --- argschema/sources/json_source.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 558ab365..1dec3ee5 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -3,28 +3,34 @@ import marshmallow as mm import argschema + class JsonInputConfigSchema(mm.Schema): input_json = argschema.fields.InputFile(required=True, - description = 'filepath to input_json') + description='filepath to input_json') + class JsonOutputConfigSchema(mm.Schema): output_json = argschema.fields.OutputFile(required=True, - description = 'filepath to save output_json') + description='filepath to save output_json') output_json_indent = argschema.fields.Int(required=False, - description = 'whether to indent options or not') + description='whether to indent options or not') + + class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema def get_dict(self): - with open(self.input_json,'r') as fp: + with open(self.input_json, 'r') as fp: return json.load(fp,) + class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - def __init__(self,output_json=None,output_json_indent=None): + + def __init__(self, output_json=None, output_json_indent=None): self.output_json = output_json self.indent = output_json_indent - def put_dict(self,d): - with open(self.output_json,'w') as fp: - json.dump(d,fp,indent=self.indent) + def put_dict(self, d): + with open(self.output_json, 'w') as fp: + json.dump(d, fp, indent=self.indent) From 58eef15b27230e3a6352b980a2db9262b1654eb6 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:52:59 -0800 Subject: [PATCH 69/86] flake8 --- argschema/sources/json_source.py | 2 +- argschema/sources/yaml_source.py | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 1dec3ee5..61ef02d0 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -18,7 +18,7 @@ class JsonOutputConfigSchema(mm.Schema): class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema - + def get_dict(self): with open(self.input_json, 'r') as fp: return json.load(fp,) diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index 1692b9dd..571dd0cd 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -1,27 +1,30 @@ import yaml -from .source import ArgSource,ArgSink +from .source import ArgSource, ArgSink import argschema import marshmallow as mm + class YamlInputConfigSchema(mm.Schema): - input_yaml = argschema.fields.InputFile(required=True, - description = 'filepath to input yaml') + input_yaml = argschema.fields.InputFile(required=True, + description='filepath to input yaml') + class YamlOutputConfigSchema(mm.Schema): - output_yaml = argschema.fields.OutputFile(required=True, - description = 'filepath to save output yaml') + output_yaml = argschema.fields.OutputFile(required=True, + description='filepath to save output yaml') + class YamlSource(ArgSource): ConfigSchema = YamlInputConfigSchema - + def get_dict(self): - with open(self.input_yaml,'r') as fp: + with open(self.input_yaml, 'r') as fp: return yaml.load(fp) + class YamlSink(ArgSink): ConfigSchema = YamlOutputConfigSchema - def put_dict(self,d): - with open(self.output_yaml,'w') as fp: - yaml.dump(d,fp,default_flow_style=False) - + def put_dict(self, d): + with open(self.output_yaml, 'w') as fp: + yaml.dump(d, fp, default_flow_style=False) From 1297faa743f5eeecd6f936b8eefb55317b8c2d3d Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:54:03 -0800 Subject: [PATCH 70/86] flake8 --- argschema/fields/files.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/argschema/fields/files.py b/argschema/fields/files.py index fd30a406..cdc3d98b 100644 --- a/argschema/fields/files.py +++ b/argschema/fields/files.py @@ -118,12 +118,6 @@ def _validate(self, value): validate_outpath(value) -def validate_input_path(value): - if not os.path.isfile(value): - raise mm.ValidationError("%s is not a file" % value) - elif not os.access(value, os.R_OK): - raise mm.ValidationError("%s is not readable" % value) - def validate_input_path(value): if not os.path.isfile(value): raise mm.ValidationError("%s is not a file" % value) From 9963abc67b0f42f95fe63076e3a219eff68f35f5 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 07:55:59 -0800 Subject: [PATCH 71/86] flake8 --- argschema/fields/files.py | 2 +- argschema/fields/numpyarrays.py | 4 ++-- argschema/fields/slice.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/argschema/fields/files.py b/argschema/fields/files.py index cdc3d98b..3b1c6b78 100644 --- a/argschema/fields/files.py +++ b/argschema/fields/files.py @@ -63,7 +63,7 @@ def _validate(self, value): path = os.path.dirname(value) except Exception as e: # pragma: no cover raise mm.ValidationError( - "%s cannot be os.path.dirname-ed" % value) # pragma: no cover + "{} cannot be os.path.dirname-ed: {}".format(value, e)) # pragma: no cover validate_outpath(path) diff --git a/argschema/fields/numpyarrays.py b/argschema/fields/numpyarrays.py index b4382b4c..b8c84645 100644 --- a/argschema/fields/numpyarrays.py +++ b/argschema/fields/numpyarrays.py @@ -27,8 +27,8 @@ def _deserialize(self, value, attr, obj): return np.array(value, dtype=self.dtype) except ValueError as e: raise mm.ValidationError( - 'Cannot create numpy array with type {} from data.'.format( - self.dtype)) + 'Cannot create numpy array with type {} from data: {}.'.format( + self.dtype, e)) def _serialize(self, value, attr, obj): if value is None: diff --git a/argschema/fields/slice.py b/argschema/fields/slice.py index 8daece63..21bd636c 100644 --- a/argschema/fields/slice.py +++ b/argschema/fields/slice.py @@ -17,7 +17,7 @@ def __init__(self, **kwargs): kwargs['metadata'] = kwargs.get( 'metadata', {'description': 'slice the dataset'}) kwargs['default'] = kwargs.get('default', slice(None)) - super(Slice, self).__init__( **kwargs) + super(Slice, self).__init__(**kwargs) def _deserialize(self, value, attr, obj): try: From 537b03d850e4a9f1ac28d6ab4e97b789a0c4409a Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 08:00:28 -0800 Subject: [PATCH 72/86] python3 inpsect warning fix --- argschema/autodoc.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/argschema/autodoc.py b/argschema/autodoc.py index a04e6e44..d2414827 100644 --- a/argschema/autodoc.py +++ b/argschema/autodoc.py @@ -3,7 +3,10 @@ from argschema.utils import get_description_from_field from argschema.argschema_parser import ArgSchemaParser import inspect - +try: + from inspect import getfullargspec +except ImportError: + from inspect import getargspec as getfullargspec FIELD_TYPE_MAP = {v: k for k, v in mm.Schema.TYPE_MAPPING.items()} @@ -26,12 +29,12 @@ def setup(app): # pick out the ArgSchemaParser objects for documenting if issubclass(obj, ArgSchemaParser): # inspect the objects init function to find default schema - (args, vargs, varkw, defaults) = inspect.getargspec(obj.__init__) + argspec = getfullargspec(obj.__init__) # find where the schema_type is as a keyword argument schema_index = next(i for i, arg in enumerate( - args) if arg == 'schema_type') + argspec.args) if arg == 'schema_type') # use its default value to construct the string version of the classpath to the module - def_schema = defaults[schema_index - 1] + def_schema = argspec.defaults[schema_index - 1] def_schema = def_schema or obj.default_schema if def_schema is not None: From 2054abc250b3c4a5611223c76e57c347679ff3eb Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 09:00:05 -0800 Subject: [PATCH 73/86] removing unused recursive checking --- argschema/argschema_parser.py | 61 ----------------------------------- 1 file changed, 61 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 89eb1b4e..ac73deea 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -10,62 +10,6 @@ from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError, get_input_from_config -def contains_non_default_schemas(schema, schema_list=[]): - """returns True if this schema contains a schema which was not an instance of DefaultSchema - - Parameters - ---------- - schema : marshmallow.Schema - schema to check - schema_list : - (Default value = []) - - Returns - ------- - bool - does this schema only contain schemas which are subclassed from schemas.DefaultSchema - - """ - if not isinstance(schema, schemas.DefaultSchema): - return True - for k, v in schema.declared_fields.items(): - if isinstance(v, mm.fields.Nested): - if type(v.schema) in schema_list: - return False - else: - schema_list.append(type(v.schema)) - if contains_non_default_schemas(v.schema, schema_list): - return True - return False - - -def is_recursive_schema(schema, schema_list=[]): - """returns true if this schema contains recursive elements - - Parameters - ---------- - schema : marshmallow.Schema - schema to check - schema_list : - (Default value = []) - - Returns - ------- - bool - does this schema contain any recursively defined schemas - - """ - for k, v in schema.declared_fields.items(): - if isinstance(v, mm.fields.Nested): - if type(v.schema) in schema_list: - return True - else: - schema_list.append(type(v.schema)) - if is_recursive_schema(v.schema, schema_list): - return True - return False - - class ArgSchemaParser(object): """The main class you should sub-class to write your own argschema module. Takes input_data, reference to a input_json and the command line inputs and parses out the parameters @@ -271,11 +215,6 @@ def output(self, d, sink=None): output dictionary to output sink: argschema.sources.source.ArgSink output_sink to output to (optional default to self.output_source) - output_path: str - path to save to output file, optional (with default to self.mod['output_json'] location) - (DEPRECATED path to save to output file, optional (with default to self.mod['output_json'] location) - **sink_options : - will be passed through to sink.put_dict Raises ------ From a5764a001f4b163bc2f68660bdbae40a2831c17e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 10:58:36 -0800 Subject: [PATCH 74/86] bumping doc reqs --- doc_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc_requirements.txt b/doc_requirements.txt index c18297e7..ed358636 100644 --- a/doc_requirements.txt +++ b/doc_requirements.txt @@ -2,6 +2,6 @@ sphinxcontrib-napoleon sphinxcontrib-programoutput sphinxcontrib-inlinesyntaxhighlight numpy -marshmallow +marshmallow==2.0.0rc4 pytest rstcheck From 054aea160bd45a6b0b31760cacf59e427315ead7 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 22 Feb 2019 11:00:41 -0800 Subject: [PATCH 75/86] doc req fix --- doc_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc_requirements.txt b/doc_requirements.txt index ed358636..4c256e74 100644 --- a/doc_requirements.txt +++ b/doc_requirements.txt @@ -2,6 +2,6 @@ sphinxcontrib-napoleon sphinxcontrib-programoutput sphinxcontrib-inlinesyntaxhighlight numpy -marshmallow==2.0.0rc4 +marshmallow==3.0.0rc4 pytest rstcheck From 5053f695ed2e95dc58c7cc8fbc8d6a880e18f735 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 4 Feb 2020 17:29:29 -0800 Subject: [PATCH 76/86] merging master --- .appveyor.yml | 2 - .circleci/config.yml | 50 +------------------- .gitignore | 4 +- README.md | 8 ++++ argschema/__init__.py | 1 + argschema/argschema_parser.py | 4 +- argschema/autodoc.py | 2 +- argschema/fields/files.py | 58 ++++++++++++++++++++--- argschema/fields/numpyarrays.py | 4 +- argschema/fields/slice.py | 2 +- argschema/schemas.py | 2 +- argschema/utils.py | 11 ++--- doc_requirements.txt | 2 +- requirements.txt | 2 +- setup.py | 7 ++- test/fields/test_files.py | 83 +++++++++++++++++++++++++++------ test/test_argschema_parser.py | 2 +- test/test_cli_overrides.py | 29 ++++++++---- test/test_first_test.py | 19 ++++++++ test/test_utils.py | 16 +++---- test/test_validate.py | 4 +- test_requirements.txt | 5 +- 22 files changed, 207 insertions(+), 110 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index c1f59f92..e0eb8ec8 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -2,8 +2,6 @@ build: false environment: matrix: - - MINICONDA: "C:\\Miniconda-x64" - PYTHON: 2.7 - MINICONDA: "C:\\Miniconda36-x64" PYTHON: 3.6 diff --git a/.circleci/config.yml b/.circleci/config.yml index 14e5d8d4..3a8d6aeb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -7,18 +7,15 @@ workflows: version: 2 build: jobs: - - test-python-2.7 - test-python-3.6.1 - merge: requires: - - test-python-2.7 - test-python-3.6.1 filters: branches: only: dev - package: requires: - - test-python-2.7 - test-python-3.6.1 - merge filters: @@ -73,8 +70,6 @@ jobs: pip install wheel .circleci/circle_dist.sh - - test-python-3.6.1: working_directory: ~/repo docker: @@ -94,7 +89,8 @@ jobs: python3 -m venv venv . venv/bin/activate pip install -r requirements.txt - pip install -r test_requirements.txt + sed '/pywin32/d' test_requirements.txt > test_requirements_nowin.txt + pip install -r test_requirements_nowin.txt - save_cache: paths: @@ -115,45 +111,3 @@ jobs: - store_artifacts: path: test-reports destination: test-reports - - - test-python-2.7: - working_directory: ~/repo - docker: - - image: circleci/python:2.7 - steps: - - checkout - # Download and cache dependencies - - restore_cache: - keys: - - python27-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "test_requirements.txt"}} - # fallback to using the latest cache if no exact match is found - - python27-dependencies- - - - run: - name: install dependencies - command: | - virtualenv venv - . venv/bin/activate - pip install -r requirements.txt - pip install -r test_requirements.txt - - - save_cache: - paths: - - ./venv - key: python27-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "test_requirements.txt"}} - - # run tests! - - run: - name: run tests - command: | - . venv/bin/activate - python setup.py test - bash <(curl -s https://codecov.io/bash) - - - store_test_results: - path: test-reports - - - store_artifacts: - path: test-reports - destination: test-reports diff --git a/.gitignore b/.gitignore index 978b4e80..3de49e52 100644 --- a/.gitignore +++ b/.gitignore @@ -105,4 +105,6 @@ ENV/ test-reports/ *DS_Store argschema/bin -examples/output*.json \ No newline at end of file +examples/output*.json + +.vscode/ \ No newline at end of file diff --git a/README.md b/README.md index 84c162f6..2e3e3e60 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,14 @@ OR pass a json_dictionary directly into the module with the parameters defined AND/OR pass parameters via the command line, in a way that will override the input_json or the json_dictionary given. +## Upgrading to version 2.0 +The major change in argschema 2.0 is becoming +compatible with marshmallow 3, which changes +many of the ways your schemas and schema modifications work. Some noteable differences are that schemas are strict now by default, so tossing keys in your outputs or inputs that were ignored and stripped before now throw errors unless +def +Please read this document for more guidance +https://marshmallow.readthedocs.io/en/stable/upgrading.html + ## Level of Support We are planning on occasional updating this tool with no fixed schedule. Community involvement is encouraged through both issues and pull requests. Please make pull requests against the dev branch, as we will test changes there before merging into master. diff --git a/argschema/__init__.py b/argschema/__init__.py index ec38f3fa..1148e215 100644 --- a/argschema/__init__.py +++ b/argschema/__init__.py @@ -3,6 +3,7 @@ from .schemas import ArgSchema # noQA:F401 from .argschema_parser import ArgSchemaParser # noQA:F401 +__version__ = "2.0.1" def main(): # pragma: no cover jm = ArgSchemaParser() diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index ac73deea..d446d663 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -197,6 +197,9 @@ def get_output_json(self, d): """ if self.output_schema_type is not None: schema = self.output_schema_type() + errors = schema.validate(d) + if len(errors)>0: + raise(mm.ValidationError(errors)) output_json = utils.dump(schema, d) else: self.logger.warning("output_schema_type is not defined,\ @@ -239,7 +242,6 @@ def load_schema_with_defaults(self, schema, args): a dictionary of input arguments schema : - Returns ------- dict diff --git a/argschema/autodoc.py b/argschema/autodoc.py index d2414827..a47a19ce 100644 --- a/argschema/autodoc.py +++ b/argschema/autodoc.py @@ -120,7 +120,7 @@ def setup(app): raw_type = '?' field_line += ":class:`~{}.{}`,{}".format( field_type.__module__, field_type.__name__, raw_type) - except: + except Exception as e: # in case this fails for some reason, note it as unknown # TODO handle this more elegantly, identify and patch up such cases field_line += "unknown,unknown" diff --git a/argschema/fields/files.py b/argschema/fields/files.py index 3b1c6b78..d9917638 100644 --- a/argschema/fields/files.py +++ b/argschema/fields/files.py @@ -3,11 +3,35 @@ import marshmallow as mm import tempfile import errno +import sys +import uuid +import stat +import warnings + + +class WindowsNamedTemporaryFile(): + def __init__(self, dir=None, mode=None): + self.filename = os.path.join(dir, str(uuid.uuid4())) + self.mode = mode + + def __enter__(self): + self.open_file = open(self.filename, self.mode) + return self.open_file + + def __exit__(self, *args): + self.open_file.close() + os.remove(self.filename) + + +if sys.platform == "win32": + NamedTemporaryFile = WindowsNamedTemporaryFile +else: + NamedTemporaryFile = tempfile.NamedTemporaryFile def validate_outpath(path): try: - with tempfile.NamedTemporaryFile(mode='w', dir=path) as tfile: + with NamedTemporaryFile(mode='w', dir=path) as tfile: tfile.write('0') tfile.close() @@ -66,6 +90,8 @@ def _validate(self, value): "{} cannot be os.path.dirname-ed: {}".format(value, e)) # pragma: no cover validate_outpath(path) +class OutputDirModeException(Exception): + pass class OutputDir(mm.fields.Str): """OutputDir is a :class:`marshmallow.fields.Str` subclass which is a path to @@ -86,6 +112,9 @@ class OutputDir(mm.fields.Str): def __init__(self, mode=None, *args, **kwargs): self.mode = mode + if (self.mode is not None) & (sys.platform == "win32"): + raise OutputDirModeException( + "Setting mode of OutputDir supported only on posix systems") super(OutputDir, self).__init__(*args, **kwargs) def _validate(self, value): @@ -121,8 +150,17 @@ def _validate(self, value): def validate_input_path(value): if not os.path.isfile(value): raise mm.ValidationError("%s is not a file" % value) - elif not os.access(value, os.R_OK): - raise mm.ValidationError("%s is not readable" % value) + else: + if sys.platform == "win32": + try: + with open(value) as f: + s = f.read() + except IOError as x: + if x.errno == errno.EACCES: + raise mm.ValidationError("%s is not readable" % value) + else: + if not os.access(value, os.R_OK): + raise mm.ValidationError("%s is not readable" % value) class InputDir(mm.fields.Str): @@ -134,9 +172,17 @@ class InputDir(mm.fields.Str): def _validate(self, value): if not os.path.isdir(value): raise mm.ValidationError("%s is not a directory") - elif not os.access(value, os.R_OK): - raise mm.ValidationError( - "%s is not a readable directory" % value) + + if sys.platform == "win32": + try: + x = list(os.scandir(value)) + except PermissionError: + raise mm.ValidationError( + "%s is not a readable directory" % value) + else: + if not os.access(value, os.R_OK): + raise mm.ValidationError( + "%s is not a readable directory" % value) class InputFile(mm.fields.Str): diff --git a/argschema/fields/numpyarrays.py b/argschema/fields/numpyarrays.py index b8c84645..98e38cc2 100644 --- a/argschema/fields/numpyarrays.py +++ b/argschema/fields/numpyarrays.py @@ -22,7 +22,7 @@ def __init__(self, dtype=None, *args, **kwargs): self.dtype = dtype super(NumpyArray, self).__init__(mm.fields.Field, *args, **kwargs) - def _deserialize(self, value, attr, obj): + def _deserialize(self, value, attr, obj, **kwargs): try: return np.array(value, dtype=self.dtype) except ValueError as e: @@ -30,7 +30,7 @@ def _deserialize(self, value, attr, obj): 'Cannot create numpy array with type {} from data: {}.'.format( self.dtype, e)) - def _serialize(self, value, attr, obj): + def _serialize(self, value, attr, obj, **kwargs): if value is None: return None return mm.fields.List._serialize(self, value.tolist(), attr, obj) diff --git a/argschema/fields/slice.py b/argschema/fields/slice.py index 21bd636c..232622cc 100644 --- a/argschema/fields/slice.py +++ b/argschema/fields/slice.py @@ -19,7 +19,7 @@ def __init__(self, **kwargs): kwargs['default'] = kwargs.get('default', slice(None)) super(Slice, self).__init__(**kwargs) - def _deserialize(self, value, attr, obj): + def _deserialize(self, value, attr, obj, **kwargs): try: args = tuple([int(c) if c else None for c in value.split(':')]) return slice(*args) diff --git a/argschema/schemas.py b/argschema/schemas.py index cd27f599..9ad7831f 100644 --- a/argschema/schemas.py +++ b/argschema/schemas.py @@ -8,7 +8,7 @@ class DefaultSchema(mm.Schema): """ @mm.pre_load - def make_object(self, in_data): + def make_object(self, in_data, **kwargs): """marshmallow.pre_load decorated function for applying defaults on deserialation Parameters diff --git a/argschema/utils.py b/argschema/utils.py index b753f72f..dc8d58c9 100644 --- a/argschema/utils.py +++ b/argschema/utils.py @@ -447,11 +447,8 @@ def dump(schema, d): dict serialized and validated dictionary """ + errors=schema.validate(d) + if len(errors)>0: + raise mm.ValidationError(errors) - results = schema.dump(d) - if isinstance(results, tuple): - (results, errors) = results - if len(errors) > 0: - raise mm.ValidationError(errors) - - return results + return schema.dump(d) diff --git a/doc_requirements.txt b/doc_requirements.txt index 4c256e74..8e00ab27 100644 --- a/doc_requirements.txt +++ b/doc_requirements.txt @@ -2,6 +2,6 @@ sphinxcontrib-napoleon sphinxcontrib-programoutput sphinxcontrib-inlinesyntaxhighlight numpy -marshmallow==3.0.0rc4 +marshmallow==3.0.0rc6 pytest rstcheck diff --git a/requirements.txt b/requirements.txt index 26770969..683a3e0c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ numpy -marshmallow==3.0.0rc4 pyyaml +marshmallow==3.0.0rc6 diff --git a/setup.py b/setup.py index 88099c9b..2922bf08 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,19 @@ from setuptools import setup, find_packages +import sys with open('requirements.txt', 'r') as f: required = f.read().splitlines() with open('test_requirements.txt', 'r') as f: test_required = f.read().splitlines() + if sys.platform != "win32": + test_required = [i for i in test_required if 'pywin32' not in i] setup(name='argschema', - version='2.0.0a1', + version='2.0.1', description=' a wrapper for setting up modules that can have parameters specified by command line arguments,\ json_files, or dictionary objects. Providing a common wrapper for data processing modules.', - author='Forrest Collman,David Feng', + author='Forrest Collman, David Feng', author_email='forrestc@alleninstitute.org', packages=find_packages(), url='https://github.com/AllenInstitute/argschema', diff --git a/test/fields/test_files.py b/test/fields/test_files.py index 7921ec2a..53c6a837 100644 --- a/test/fields/test_files.py +++ b/test/fields/test_files.py @@ -1,8 +1,13 @@ import pytest from argschema import ArgSchemaParser, ArgSchema from argschema.fields import InputFile, OutputFile, InputDir, OutputDir +from argschema.fields.files import OutputDirModeException import marshmallow as mm import os +import sys +if sys.platform == "win32": + import win32security + import ntsecuritycon as con # OUTPUT FILE TESTS @@ -22,12 +27,22 @@ class BasicOutputFile(ArgSchema): def test_outputfile_no_write(tmpdir): outdir = tmpdir.mkdir('cannot_write_here') - outdir.chmod(0o444) + if sys.platform == "win32": + sd = win32security.GetFileSecurity(str(outdir), win32security.DACL_SECURITY_INFORMATION) + everyone, domain, type = win32security.LookupAccountName ("", "Everyone") + dacl = win32security.ACL () + dacl.AddAccessAllowedAce (win32security.ACL_REVISION, con.FILE_GENERIC_READ, everyone) + sd.SetSecurityDescriptorDacl (1, dacl, 0) + win32security.SetFileSecurity (str(outdir), win32security.DACL_SECURITY_INFORMATION, sd) + else: + outdir.chmod(0o444) outfile = outdir.join('test') + with pytest.raises(mm.ValidationError): ArgSchemaParser(input_data={'output_file': str(outfile)}, - schema_type=BasicOutputFile) - outdir.chmod(0o666) + schema_type=BasicOutputFile, args=[]) + if sys.platform != "win32": + outdir.chmod(0o666) def test_outputfile_not_a_path(): @@ -81,10 +96,17 @@ def test_output_dir_basic(tmpdir): input_data=output_dir_example, args=[]) - def test_output_dir_bad_permission(tmpdir): - outdir = tmpdir.mkdir('no_write') - outdir.chmod(0o222) + outdir = tmpdir.mkdir('no_read') + if sys.platform == "win32": + sd = win32security.GetFileSecurity(str(outdir), win32security.DACL_SECURITY_INFORMATION) + everyone, domain, type = win32security.LookupAccountName ("", "Everyone") + dacl = win32security.ACL () + dacl.AddAccessAllowedAce (win32security.ACL_REVISION, con.FILE_GENERIC_WRITE, everyone) + sd.SetSecurityDescriptorDacl (1, dacl, 0) + win32security.SetFileSecurity (str(outdir), win32security.DACL_SECURITY_INFORMATION, sd) + else: + outdir.chmod(0o222) output_dir_example = { 'output_dir': outdir } @@ -103,13 +125,22 @@ def test_output_dir_bad_location(): input_data=output_dir_example, args=[]) +if sys.platform != "win32": + class ModeOutputDirSchema(ArgSchema): + output_dir = OutputDir(required=True, + description="775 output directory", + mode=0o775) + -class ModeOutputDirSchema(ArgSchema): - output_dir = OutputDir(required=True, - description="775 output directory", - mode=0o775) +@pytest.mark.skipif(sys.platform != "win32", reason="no general support for chmod octal in windows") +def test_windows_outdir_mode_fail(): + with pytest.raises(OutputDirModeException): + output_dir = OutputDir(required=True, + description="775 output directory", + mode=0o775) +@pytest.mark.skipif(sys.platform == "win32", reason="no general support for chmod octal in windows") def test_mode_output_osdir(tmpdir): outdir = tmpdir.join('mytmp') output_dir_example = { @@ -121,6 +152,7 @@ def test_mode_output_osdir(tmpdir): assert((os.stat(mod.args['output_dir']).st_mode & 0o777) == 0o775) +@pytest.mark.skipif(sys.platform == "win32", reason="no general support for chmod octal in windows") def test_failed_mode(tmpdir): outdir = tmpdir.join('mytmp_failed') os.makedirs(str(outdir)) @@ -163,7 +195,21 @@ def test_relative_file_input_failed(): def test_access_inputfile_failed(): with open(input_file_example['input_file'], 'w') as fp: fp.write('test') - os.chmod(input_file_example['input_file'], 0o222) + + if sys.platform == "win32": + sd = win32security.GetFileSecurity( + input_file_example['input_file'], + win32security.DACL_SECURITY_INFORMATION) + everyone, domain, type = win32security.LookupAccountName ("", "Everyone") + dacl = win32security.ACL () + dacl.AddAccessAllowedAce (win32security.ACL_REVISION, con.FILE_GENERIC_WRITE, everyone) + sd.SetSecurityDescriptorDacl (1, dacl, 0) + win32security.SetFileSecurity ( + input_file_example['input_file'], + win32security.DACL_SECURITY_INFORMATION, sd) + else: + os.chmod(input_file_example['input_file'], 0o222) + with pytest.raises(mm.ValidationError): ArgSchemaParser( input_data=input_file_example, schema_type=BasicInputFile, args=[]) @@ -192,10 +238,21 @@ def test_bad_inputdir(): ArgSchemaParser(input_data=input_data, schema_type=BasicInputDir, args=[]) - def test_inputdir_no_access(tmpdir): input_dir = tmpdir.mkdir('no_access') - input_dir.chmod(0o222) + if sys.platform == "win32": + sd = win32security.GetFileSecurity( + str(input_dir), + win32security.DACL_SECURITY_INFORMATION) + everyone, domain, type = win32security.LookupAccountName ("", "Everyone") + dacl = win32security.ACL () + dacl.AddAccessAllowedAce (win32security.ACL_REVISION, con.FILE_GENERIC_WRITE, everyone) + sd.SetSecurityDescriptorDacl (1, dacl, 0) + win32security.SetFileSecurity ( + str(input_dir), + win32security.DACL_SECURITY_INFORMATION, sd) + else: + input_dir.chmod(0o222) input_data = { 'input_dir': str(input_dir) } diff --git a/test/test_argschema_parser.py b/test/test_argschema_parser.py index dd8c56b7..f1afd3e6 100644 --- a/test/test_argschema_parser.py +++ b/test/test_argschema_parser.py @@ -53,7 +53,7 @@ def test_my_default_nested_parser(): } argschema.ArgSchemaParser(input_data=input_data, schema_type=MySchema2, - args=None) + args=[]) @pytest.mark.parametrize("default,args,expected", [ diff --git a/test/test_cli_overrides.py b/test/test_cli_overrides.py index d2240908..e130b887 100644 --- a/test/test_cli_overrides.py +++ b/test/test_cli_overrides.py @@ -48,7 +48,6 @@ def test_data(inputdir, inputfile, outputdir, outputfile): "inputfile": str(inputfile), "integer": 10, "list": [300, 200, 800, 1000], - "localdatetime": "0001-01-01T00:00:00", "log_level": "ERROR", "nested": {"a": 1, "b": False}, "number": 5.5, @@ -82,8 +81,7 @@ class MySchema(ArgSchema): inputdir = fields.InputDir(required=True) inputfile = fields.InputFile(required=True) integer = fields.Int(required=True) - list = fields.List(fields.Int, required=True) - localdatetime = fields.LocalDateTime(required=True) + list = fields.List(fields.Int, required=True, cli_as_single_argument=True) nested = fields.Nested(MyNestedSchema, required=True) number = fields.Number(required=True) numpyarray = fields.NumpyArray(dtype="uint8", required=True) @@ -212,13 +210,24 @@ def test_override_list(test_data): args=["--list", "invalid"]) -def test_override_localdatetime(test_data): - mod = ArgSchemaParser(test_data, schema_type=MySchema, - args=["--localdatetime", "1977-05-04T00:00:00"]) - assert(mod.args["localdatetime"] == datetime.datetime(1977, 5, 4, 0, 0, 0)) - with pytest.raises(mm.ValidationError): - mod = ArgSchemaParser(test_data, schema_type=MySchema, - args=["--localdatetime", "invalid"]) +# def test_override_list_deprecated(deprecated_data): +# with pytest.warns(FutureWarning): +# mod = ArgSchemaParser(deprecated_data, schema_type=MyDeprecatedSchema, +# args=["--list_deprecated", "1000", "3000"]) +# assert(mod.args["list_deprecated"] == [1000, 3000]) +# with pytest.raises(mm.ValidationError): +# mod = ArgSchemaParser(deprecated_data, +# schema_type=MyDeprecatedSchema, +# args=["--list_deprecated", "[1000,3000]"]) + + +# def test_override_localdatetime(test_data): +# mod = ArgSchemaParser(test_data, schema_type=MySchema, +# args=["--localdatetime", "1977-05-04T00:00:00"]) +# assert(mod.args["localdatetime"] == datetime.datetime(1977, 5, 4, 0, 0, 0)) +# with pytest.raises(mm.ValidationError): +# mod = ArgSchemaParser(test_data, schema_type=MySchema, +# args=["--localdatetime", "invalid"]) def test_override_log_level(test_data): diff --git a/test/test_first_test.py b/test/test_first_test.py index 9aa70b91..b918085b 100644 --- a/test/test_first_test.py +++ b/test/test_first_test.py @@ -252,3 +252,22 @@ def test_simple_description(): } argschema.ArgSchemaParser( input_data=d, schema_type=MyShorterExtension) + +class MySchemaPostLoad(ArgSchema): + xid = argschema.fields.Int(required=True) + + @mm.post_load + def my_post(self, data): + return data + +class MyPostLoadClass(ArgSchemaParser): + default_schema = MySchemaPostLoad + def run(self): + print(self.args) + +def test_post_load_schema(): + example1 = { + 'xid': 1, + } + mb = MyPostLoadClass(input_data=example1, args=[]) + mb.run() diff --git a/test/test_utils.py b/test/test_utils.py index d47391f0..59a44c3b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -117,14 +117,14 @@ def test_schema_argparser_with_baseball(): 'name': 'Roger Clemens', 'number': 21 }, - 'bases_occupied': [1, 2, 3], - 'outs': 2, - 'strikes': 2, - 'balls': 3, - 'inning': 9, - 'bottom': True, - 'score_home': 2, - 'score_away': 3 + 'bases_occupied':[1, 2, 3], + 'outs':2, + 'strikes':2, + 'balls':3, + 'inning':9, + 'bottom':True, + 'score_home':2, + 'score_away':3 } schema = BaseballSituation() ArgSchemaParser(input_data=example_situation, diff --git a/test/test_validate.py b/test/test_validate.py index 568c99ab..4220d310 100644 --- a/test/test_validate.py +++ b/test/test_validate.py @@ -63,6 +63,6 @@ def test_shape_call(validation_shape, input_array): def test_parser_validation(input_dict, raises): if raises: with pytest.raises(mm.ValidationError): - ArgSchemaParser(input_data=input_dict, schema_type=MySchema) + ArgSchemaParser(input_data=input_dict, schema_type=MySchema, args=[]) else: - ArgSchemaParser(input_data=input_dict, schema_type=MySchema) + ArgSchemaParser(input_data=input_dict, schema_type=MySchema, args=[]) diff --git a/test_requirements.txt b/test_requirements.txt index 82527cc6..c8611145 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,4 +1,4 @@ -coverage>=4.0 +coverage==4.5.4 mock>=2.0.0 pytest>=3.0.7 pep8>=1.7.0 @@ -9,4 +9,5 @@ pylint>=1.5.4 flake8>=3.0.4 rstcheck sphinx -requests \ No newline at end of file +requests +pywin32 From 16232cbc2d50e6067c2186708d7dd8b195b12365 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Fri, 21 Feb 2020 15:14:18 -0800 Subject: [PATCH 77/86] runtime input source selection --- argschema/__init__.py | 6 +- argschema/argschema_parser.py | 163 ++++++++++++++------- argschema/sources/__init__.py | 4 +- argschema/sources/json_source.py | 6 +- argschema/sources/source.py | 87 +++-------- argschema/sources/yaml_source.py | 6 +- test/_sources/conftest.py | 2 + test/{sources => _sources}/test_classes.py | 0 test/{sources => _sources}/test_json.py | 15 +- test/{sources => _sources}/test_url.py | 0 test/{sources => _sources}/test_yaml.py | 0 test/{sources => _sources}/url_source.py | 0 test/sources/test_parser_integration.py | 100 +++++++++++++ test/test_argschema_parser.py | 2 +- test/test_first_test.py | 2 +- 15 files changed, 255 insertions(+), 138 deletions(-) create mode 100644 test/_sources/conftest.py rename test/{sources => _sources}/test_classes.py (100%) rename test/{sources => _sources}/test_json.py (63%) rename test/{sources => _sources}/test_url.py (100%) rename test/{sources => _sources}/test_yaml.py (100%) rename test/{sources => _sources}/url_source.py (100%) create mode 100644 test/sources/test_parser_integration.py diff --git a/argschema/__init__.py b/argschema/__init__.py index 1148e215..7967d8a8 100644 --- a/argschema/__init__.py +++ b/argschema/__init__.py @@ -1,7 +1,7 @@ '''argschema: flexible definition, validation and setting of parameters''' -from .fields import InputFile, InputDir, OutputFile # noQA:F401 -from .schemas import ArgSchema # noQA:F401 -from .argschema_parser import ArgSchemaParser # noQA:F401 +from argschema.fields import InputFile, InputDir, OutputFile # noQA:F401 +from argschema.schemas import ArgSchema # noQA:F401 +from argschema.argschema_parser import ArgSchemaParser # noQA:F401 __version__ = "2.0.1" diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index d446d663..b10dca26 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -1,13 +1,19 @@ '''Module that contains the base class ArgSchemaParser which should be subclassed when using this library ''' +from typing import List, Sequence, Dict, Optional, Union import logging from . import schemas from . import utils import marshmallow as mm from .sources.json_source import JsonSource, JsonSink from .sources.yaml_source import YamlSource, YamlSink -from .sources.source import NotConfiguredSourceError, MultipleConfiguredSourceError, get_input_from_config +from .sources.source import ( + ArgSource, + ArgSink, + NotConfiguredSourceError, + MultipleConfiguredSourceError, +) class ArgSchemaParser(object): @@ -26,9 +32,9 @@ class ArgSchemaParser(object): the schema to use to validate the parameters output_schema_type : marshmallow.Schema the schema to use to validate the output, used by self.output - input_source : argschema.sources.source.Source - a generic source of a dictionary - output_sink : argschema.sources.source.Source + input_sources : Sequence[argschema.sources.source.ConfigurableSource] + Each of these will be considered + output_sinks : Sequence[argschema.sources.source.ConfigurableSource] a generic sink to write output dictionary to args : list or None command line arguments passed to the module, if None use argparse to parse the command line, @@ -45,16 +51,38 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None - default_configurable_sources = [JsonSource] - default_configurable_sinks = [JsonSink] + default_sources = (JsonSource,) + default_sinks = (JsonSink,) + + @property + def input_sources(self) -> List[ArgSource]: + if not hasattr(self, "_input_sources"): + self._input_sources = [] + return self._input_sources + + @property + def output_sinks(self) -> List[ArgSource]: + if not hasattr(self, "_output_sinks"): + self._output_sinks = [] + return self._output_sinks + + @property + def io_schemas(self) -> List[mm.Schema]: + if not hasattr(self, "_io_schemas"): + self._io_schemas = [] + return self._io_schemas + + @io_schemas.setter + def io_schemas(self, schemas: List[mm.Schema]): + self._io_schemas = schemas def __init__(self, input_data=None, # dictionary input as option instead of --input_json schema_type=None, # schema for parsing arguments output_schema_type=None, # schema for parsing output_json args=None, - input_source=None, - output_sink=None, + input_sources=None, + output_sinks=None, logger_name=__name__): if schema_type is None: @@ -66,51 +94,85 @@ def __init__(self, self.logger = self.initialize_logger(logger_name, 'WARNING') self.logger.debug('input_data is {}'.format(input_data)) - # convert schema to argparse object + self.register_sources(input_sources) + self.register_sinks(output_sinks) - # consolidate a list of the input and output source - # command line configuration schemas - io_schemas = [] - for in_cfg in self.default_configurable_sources: - io_schemas.append(in_cfg.ConfigSchema()) - for out_cfg in self.default_configurable_sinks: - io_schemas.append(out_cfg.ConfigSchema()) + argsdict = self.parse_command_line(args) + resolved_args = self.resolve_inputs(input_data, argsdict) - # build a command line parser from the input schemas and configurations - p = utils.schema_argparser(self.schema, io_schemas) - argsobj = p.parse_args(args) - argsdict = utils.args_to_dict(argsobj, [self.schema] + io_schemas) + self.output_sink = self.__get_output_sink_from_config(resolved_args) + self.args = self.load_schema_with_defaults(self.schema, resolved_args) + + self.output_schema_type = output_schema_type + self.logger = self.initialize_logger( + logger_name, self.args.get('log_level')) + + def register_sources( + self, + sources: Union[None, Sequence[ArgSource], ArgSource] + ): + """consolidate a list of the input and output source command line + configuration schemas + """ + + if isinstance(sources, (ArgSource, type)): + sources = [sources] + elif sources is None: + sources = self.default_sources + + + for source in sources: + if isinstance(source, type): + source = source() + self.io_schemas.append(source.schema) + self.input_sources.append(source) + + def register_sinks( + self, + sinks: Union[None, Sequence[ArgSink], ArgSink] + ): + """ + """ + + if isinstance(sinks, (ArgSink, type)): + sinks = [sinks] + elif sinks is None: + sinks = self.default_sinks + + for sink in sinks: + if isinstance(sink, type): + sink = sink() + self.io_schemas.append(sink.schema) + self.output_sinks.append(sink) + + def parse_command_line(self, args: Optional[List]): + """ build a command line parser from the input schemas and + configurations + """ + parser = utils.schema_argparser(self.schema, self.io_schemas) + argsobj = parser.parse_args(args) + argsdict = utils.args_to_dict(argsobj, [self.schema] + self.io_schemas) self.logger.debug('argsdict is {}'.format(argsdict)) + return argsdict + + def resolve_inputs(self, input_data: Dict, argsdict: Dict) -> Dict: + """ Resolve input source by checking candidate sources against + constructor and command line arguments + """ - # if you received an input_source, get the dictionary from there - if input_source is not None: - input_data = input_source.get_dict() - else: # see if the input_data itself contains an InputSource configuration use that - config_data = self.__get_input_data_from_config(input_data) - input_data = config_data if config_data is not None else input_data + config_data = self.__get_input_data_from_config(input_data) + if config_data is not None: + input_data = config_data - # check whether the command line arguments contain an input configuration and use that config_data = self.__get_input_data_from_config( utils.smart_merge({}, argsdict)) - input_data = config_data if config_data is not None else input_data + if config_data is not None: + input_data = config_data - # merge the command line dictionary into the input json args = utils.smart_merge(input_data, argsdict) self.logger.debug('args after merge {}'.format(args)) - # if the output sink was not passed in, see if there is a configuration in the combined args - if output_sink is None: - output_sink = self.__get_output_sink_from_config(args) - # save the output sink for later - self.output_sink = output_sink - - # validate with load! - result = self.load_schema_with_defaults(self.schema, args) - - self.args = result - self.output_schema_type = output_schema_type - self.logger = self.initialize_logger( - logger_name, self.args.get('log_level')) + return args def __get_output_sink_from_config(self, d): """private function to check for ArgSink configuration in a dictionary and return a configured ArgSink @@ -132,14 +194,14 @@ def __get_output_sink_from_config(self, d): """ output_set = False output_sink = None - for OutputSink in self.default_configurable_sinks: + for sink in self.output_sinks: try: - output_config_d = OutputSink.get_config( - OutputSink.ConfigSchema, d) + sink.load_config(d) + if output_set: raise MultipleConfiguredSourceError( "more then one OutputSink configuration present in {}".format(d)) - output_sink = OutputSink(**output_config_d) + output_sink = sink output_set = True except NotConfiguredSourceError: pass @@ -166,9 +228,10 @@ def __get_input_data_from_config(self, d): """ input_set = False input_data = None - for InputSource in self.default_configurable_sources: + for source in self.input_sources: try: - input_data = get_input_from_config(InputSource, d) + source.load_config(d) + input_data = source.get_dict() if input_set: raise MultipleConfiguredSourceError( "more then one InputSource configuration present in {}".format(d)) @@ -286,5 +349,5 @@ def initialize_logger(name, log_level): class ArgSchemaYamlParser(ArgSchemaParser): - default_configurable_sources = [YamlSource] - default_configurable_sinks = [YamlSink] + default_sources = [YamlSource] + default_sinks = [YamlSink] diff --git a/argschema/sources/__init__.py b/argschema/sources/__init__.py index 4a732b36..7d604108 100644 --- a/argschema/sources/__init__.py +++ b/argschema/sources/__init__.py @@ -1,2 +1,2 @@ -from .source import ArgSink, ArgSource -from .json_source import JsonSource, JsonSink +from argschema.sources.source import ArgSink, ArgSource +from argschema.sources.json_source import JsonSource, JsonSink diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 61ef02d0..12d517d7 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -20,17 +20,13 @@ class JsonSource(ArgSource): ConfigSchema = JsonInputConfigSchema def get_dict(self): - with open(self.input_json, 'r') as fp: + with open(self.config["input_json"], 'r') as fp: return json.load(fp,) class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema - def __init__(self, output_json=None, output_json_indent=None): - self.output_json = output_json - self.indent = output_json_indent - def put_dict(self, d): with open(self.output_json, 'w') as fp: json.dump(d, fp, indent=self.indent) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index e1154130..6670ecaf 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -1,3 +1,6 @@ +import abc +from typing import Dict + import marshmallow as mm @@ -64,82 +67,29 @@ def __init__(self, **kwargs): which will define the set of fields that are allowed (and their defaults) """ self.schema = self.ConfigSchema() - result = self.get_config(self.ConfigSchema, kwargs) - self.__dict__.update(result) - - @staticmethod - def get_config(ConfigSchema, d): - """A static method to get the proper validated configuration keyword arguments/dictionary - of a Configurable source from a dictionary + self.config = {} - Parameters - ---------- - ConfigSchema: marshmallow.Schema - a marshmallow schema that defines the configuration schema for this ConfigurableSource - d: dict - a dictionary that might contain a proper configuration of this schema - - Returns - ------- - dict - a dictionary of configuration values that has been properly deserialized and validated by - ConfigSchema - Raises - ------ - NotConfiguredSourceError - if the configation dictionary does not contain a configuration for this source - MisconfiguredSourceError - if the configuration dictionary contains a configuration but it is invalid + def load_config(self, candidate: Dict): """ - schema = ConfigSchema() - if not d_contains_any_fields(schema, d): + """ + + if candidate is None: + raise NotConfiguredSourceError("No data was provided") + + if not d_contains_any_fields(self.schema, candidate): raise NotConfiguredSourceError( - "This source is not present in \n {}".format(d)) - else: - try: - result = schema.load(d, unknown=mm.EXCLUDE) - except mm.ValidationError as e: - raise MisconfiguredSourceError( - "Source incorrectly configured\n {}".format(e)) - else: - return result + "This source is not present in \n {}".format(candidate)) + + try: + self.config = self.schema.load(candidate, unknown=mm.EXCLUDE) + except mm.ValidationError as e: + raise MisconfiguredSourceError( + "Source incorrectly configured\n {}".format(e)) class ArgSource(ConfigurableSource): def get_dict(self): """method that must be implemented to enable an ArgSource to return a dictionary""" - pass - - -def get_input_from_config(ArgSource, config_d): - """function to return the input dictionary from an ArgSource, given a configuration dictionary - - Parameters - ---------- - ArgSource: class(ArgSource) - The ArgSource class subclass that you want to get input from - config_d: a dictionary that might contain a configuration for this source - - Returns - ------- - dict - a dictionary returned by ArgSource.get_dict() after validating configuration - and instantiating an ArgSource instance - - Raises - ------ - NotConfiguredSourceError - if the configation dictionary does not contain a configuration for this source - MisconfiguredSourceError - if the configuration dictionary contains a configuration but it is invalid - """ - if config_d is not None: - input_config_d = ArgSource.get_config(ArgSource.ConfigSchema, config_d) - input_source = ArgSource(**input_config_d) - input_data = input_source.get_dict() - return input_data - else: - raise NotConfiguredSourceError('No dictionary provided') class ArgSink(ConfigurableSource): @@ -151,4 +101,3 @@ def put_dict(self, d): d: dict the dictionary to write """ - pass diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index 571dd0cd..eb8f2dd0 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -1,5 +1,5 @@ import yaml -from .source import ArgSource, ArgSink +from argschema.sources.source import ArgSource, ArgSink import argschema import marshmallow as mm @@ -18,7 +18,7 @@ class YamlSource(ArgSource): ConfigSchema = YamlInputConfigSchema def get_dict(self): - with open(self.input_yaml, 'r') as fp: + with open(self.config["input_yaml"], 'r') as fp: return yaml.load(fp) @@ -26,5 +26,5 @@ class YamlSink(ArgSink): ConfigSchema = YamlOutputConfigSchema def put_dict(self, d): - with open(self.output_yaml, 'w') as fp: + with open(self.config["output_yaml"], 'w') as fp: yaml.dump(d, fp, default_flow_style=False) diff --git a/test/_sources/conftest.py b/test/_sources/conftest.py new file mode 100644 index 00000000..e8d71cba --- /dev/null +++ b/test/_sources/conftest.py @@ -0,0 +1,2 @@ +def pytest_ignore_collect(path, config): + return True \ No newline at end of file diff --git a/test/sources/test_classes.py b/test/_sources/test_classes.py similarity index 100% rename from test/sources/test_classes.py rename to test/_sources/test_classes.py diff --git a/test/sources/test_json.py b/test/_sources/test_json.py similarity index 63% rename from test/sources/test_json.py rename to test/_sources/test_json.py index 1acc001b..c9d55d5e 100644 --- a/test/sources/test_json.py +++ b/test/_sources/test_json.py @@ -18,12 +18,19 @@ def test_input_file(tmpdir_factory): } } with open(str(file_in),'w') as fp: - json.dump(input_data,fp) + json.dump(input_data, fp) return str(file_in) -def test_json_source(test_input_file): - source = JsonSource(input_json=test_input_file) - mod = MyParser(input_source= source, args=[]) +def test_json_source_input_data(test_input_file): + mod = MyParser( + input_sources=JsonSource(), + input_data={"input_json": test_input_file}, + args=[] + ) + +# def test_json_source(test_input_file): +# source = JsonSource(input_json=test_input_file) +# mod = MyParser(input_sources= source, args=) def test_json_source_command(test_input_file): mod = MyParser(args = ['--input_json',test_input_file]) \ No newline at end of file diff --git a/test/sources/test_url.py b/test/_sources/test_url.py similarity index 100% rename from test/sources/test_url.py rename to test/_sources/test_url.py diff --git a/test/sources/test_yaml.py b/test/_sources/test_yaml.py similarity index 100% rename from test/sources/test_yaml.py rename to test/_sources/test_yaml.py diff --git a/test/sources/url_source.py b/test/_sources/url_source.py similarity index 100% rename from test/sources/url_source.py rename to test/_sources/url_source.py diff --git a/test/sources/test_parser_integration.py b/test/sources/test_parser_integration.py new file mode 100644 index 00000000..adc2ff24 --- /dev/null +++ b/test/sources/test_parser_integration.py @@ -0,0 +1,100 @@ +import json +import yaml + +import pytest + +import argschema +from argschema.sources.json_source import JsonSource +from argschema.sources.yaml_source import YamlSource +from argschema.sources.source import MultipleConfiguredSourceError + + +class MyNestedSchema(argschema.schemas.DefaultSchema): + one = argschema.fields.Int(required=True,description="nested integer") + two = argschema.fields.Boolean(required=True,description="a nested boolean") + +class MySchema(argschema.ArgSchema): + a = argschema.fields.Int(required=True,description="parameter a") + b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") + nest = argschema.fields.Nested(MyNestedSchema,description="a nested schema") + +class MyOutputSchema(argschema.schemas.DefaultSchema): + a = argschema.fields.Int(required=True,description="parameter a") + b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") + +class MyParser(argschema.ArgSchemaParser): + default_schema = MySchema + +@pytest.fixture(scope='module') +def json_inp(tmpdir_factory): + file_in = tmpdir_factory.mktemp('test').join('test_input_json.json') + input_data = { + 'a':5, + 'nest':{ + 'one':7, + 'two':False + } + } + + with open(str(file_in),'w') as fp: + json.dump(input_data, fp) + + return str(file_in) + +@pytest.fixture(scope='module') +def yaml_inp(tmpdir_factory): + file_in = tmpdir_factory.mktemp('test').join('test_input_yaml.yaml') + input_data = { + 'a':6, + 'nest':{ + 'one':8, + 'two':False + } + } + + with open(str(file_in),'w') as fp: + yaml.dump(input_data, fp) + + return str(file_in) + + +@pytest.mark.parametrize("inp_sources", [ + JsonSource(), [JsonSource()], JsonSource, [JsonSource] +]) +def test_json_input_args(json_inp, inp_sources): + parser = MyParser( + input_sources=inp_sources, + args=["--input_json", + json_inp] + ) + + assert parser.args["a"] == 5 + + +@pytest.mark.parametrize("inp_sources", [ + JsonSource(), [JsonSource()], JsonSource, [JsonSource] +]) +def test_json_input_data(json_inp, inp_sources): + parser = MyParser( + input_sources=inp_sources, + input_data={"input_json":json_inp}, + args=[] + ) + + assert parser.args["a"] == 5 + + +def test_multisource_arg(yaml_inp): + parser = MyParser( + input_sources=[JsonSource, YamlSource], + args=["--input_yaml", yaml_inp] + ) + assert parser.args["a"] == 6 + + +def test_multisource_arg_conflict(json_inp, yaml_inp): + with pytest.raises(MultipleConfiguredSourceError): + parser = MyParser( + input_sources=[JsonSource, YamlSource], + args=["--input_yaml", yaml_inp, "--input_json", json_inp] + ) diff --git a/test/test_argschema_parser.py b/test/test_argschema_parser.py index f1afd3e6..b3c26346 100644 --- a/test/test_argschema_parser.py +++ b/test/test_argschema_parser.py @@ -87,7 +87,7 @@ def test_parser_output(tmpdir_factory): 'two': False } } - mod = MyParser(input_data=input_data) + mod = MyParser(input_data=input_data, args=[]) json_sink = argschema.sources.JsonSink(output_json=str(json_path), output_json_indent=2) mod.output(mod.args, sink=json_sink) with open(str(json_path), 'r') as jf: diff --git a/test/test_first_test.py b/test/test_first_test.py index b918085b..9672658b 100644 --- a/test/test_first_test.py +++ b/test/test_first_test.py @@ -251,7 +251,7 @@ def test_simple_description(): 'd': [1, 5, 4] } argschema.ArgSchemaParser( - input_data=d, schema_type=MyShorterExtension) + input_data=d, schema_type=MyShorterExtension, args=[]) class MySchemaPostLoad(ArgSchema): xid = argschema.fields.Int(required=True) From ca0e76e91fe1e98d1333f07b00be9f4605b84fcf Mon Sep 17 00:00:00 2001 From: nile graddis Date: Fri, 21 Feb 2020 17:37:42 -0800 Subject: [PATCH 78/86] tests for specific sources and sinks --- argschema/sources/json_source.py | 4 +- .../sources}/url_source.py | 22 ++--- test/_sources/conftest.py | 2 - test/_sources/test_classes.py | 14 --- test/_sources/test_json.py | 36 -------- test/_sources/test_url.py | 41 --------- test/_sources/test_yaml.py | 90 ------------------- test/sources/test_json.py | 27 ++++++ test/sources/test_parser_integration.py | 37 ++++++-- test/sources/test_url.py | 37 ++++++++ test/sources/test_yaml.py | 25 ++++++ 11 files changed, 133 insertions(+), 202 deletions(-) rename {test/_sources => argschema/sources}/url_source.py (64%) delete mode 100644 test/_sources/conftest.py delete mode 100644 test/_sources/test_classes.py delete mode 100644 test/_sources/test_json.py delete mode 100644 test/_sources/test_url.py delete mode 100644 test/_sources/test_yaml.py create mode 100644 test/sources/test_json.py create mode 100644 test/sources/test_url.py create mode 100644 test/sources/test_yaml.py diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 12d517d7..8dab3e95 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -28,5 +28,5 @@ class JsonSink(ArgSink): ConfigSchema = JsonOutputConfigSchema def put_dict(self, d): - with open(self.output_json, 'w') as fp: - json.dump(d, fp, indent=self.indent) + with open(self.config["output_json"], 'w') as fp: + json.dump(d, fp, indent=self.config.get("output_json_indent", None)) diff --git a/test/_sources/url_source.py b/argschema/sources/url_source.py similarity index 64% rename from test/_sources/url_source.py rename to argschema/sources/url_source.py index 56e67808..48456d8b 100644 --- a/test/_sources/url_source.py +++ b/argschema/sources/url_source.py @@ -2,7 +2,6 @@ from argschema.schemas import DefaultSchema from argschema.fields import Str,Int from argschema import ArgSchemaParser -from test_classes import MySchema import requests try: from urllib.parse import urlunparse @@ -19,15 +18,16 @@ class UrlSource(ArgSource): ConfigSchema = UrlSourceConfig def get_dict(self): - if self.input_port is None: - netloc = self.input_host - else: - netloc = "{}:{}".format(self.input_host,self.input_port) - url = urlunparse((self.input_protocol,netloc,self.input_url,None,None,None)) + netloc = self.config["input_host"] + if self.config["input_port"] is not None: + netloc = "{}:{}".format(netloc, self.config["input_port"]) + + url = urlunparse(( + self.config["input_protocol"], + netloc, + self.config["input_url"], + None, None, None + )) + response = requests.get(url) return response.json() - - -class UrlArgSchemaParser(ArgSchemaParser): - default_configurable_sources = [UrlSource] - default_schema = MySchema diff --git a/test/_sources/conftest.py b/test/_sources/conftest.py deleted file mode 100644 index e8d71cba..00000000 --- a/test/_sources/conftest.py +++ /dev/null @@ -1,2 +0,0 @@ -def pytest_ignore_collect(path, config): - return True \ No newline at end of file diff --git a/test/_sources/test_classes.py b/test/_sources/test_classes.py deleted file mode 100644 index a9de6944..00000000 --- a/test/_sources/test_classes.py +++ /dev/null @@ -1,14 +0,0 @@ -import argschema - -class MyNestedSchema(argschema.schemas.DefaultSchema): - one = argschema.fields.Int(required=True,description="nested integer") - two = argschema.fields.Boolean(required=True,description="a nested boolean") - -class MySchema(argschema.ArgSchema): - a = argschema.fields.Int(required=True,description="parameter a") - b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") - nest = argschema.fields.Nested(MyNestedSchema,description="a nested schema") - -class MyOutputSchema(argschema.schemas.DefaultSchema): - a = argschema.fields.Int(required=True,description="parameter a") - b = argschema.fields.Str(required=False,default="my value",description="optional b string parameter") \ No newline at end of file diff --git a/test/_sources/test_json.py b/test/_sources/test_json.py deleted file mode 100644 index c9d55d5e..00000000 --- a/test/_sources/test_json.py +++ /dev/null @@ -1,36 +0,0 @@ -import argschema -from argschema.sources.json_source import JsonSource -from test_classes import MySchema -import json -import pytest - -class MyParser(argschema.ArgSchemaParser): - default_schema = MySchema - -@pytest.fixture(scope='module') -def test_input_file(tmpdir_factory): - file_in = tmpdir_factory.mktemp('test').join('test_input_json.json') - input_data = { - 'a':5, - 'nest':{ - 'one':7, - 'two':False - } - } - with open(str(file_in),'w') as fp: - json.dump(input_data, fp) - return str(file_in) - -def test_json_source_input_data(test_input_file): - mod = MyParser( - input_sources=JsonSource(), - input_data={"input_json": test_input_file}, - args=[] - ) - -# def test_json_source(test_input_file): -# source = JsonSource(input_json=test_input_file) -# mod = MyParser(input_sources= source, args=) - -def test_json_source_command(test_input_file): - mod = MyParser(args = ['--input_json',test_input_file]) \ No newline at end of file diff --git a/test/_sources/test_url.py b/test/_sources/test_url.py deleted file mode 100644 index f90cf9b3..00000000 --- a/test/_sources/test_url.py +++ /dev/null @@ -1,41 +0,0 @@ -import requests -import mock -from url_source import UrlArgSchemaParser, UrlSource, MySchema -from argschema import ArgSchemaParser -# This method will be used by the mock to replace requests.get - - -def mocked_requests_get(*args, **kwargs): - class MockResponse: - def __init__(self, json_data, status_code): - self.json_data = json_data - self.status_code = status_code - - def json(self): - return self.json_data - - if args[0] == 'http://localhost:88/test.json': - return MockResponse({ - 'a': 7, - 'nest': { - 'one': 7, - 'two': False - } - }, 200) - return MockResponse(None, 404) - - -@mock.patch('requests.get', side_effect=mocked_requests_get) -def test_url_parser(mock_get): - input_source = UrlSource(input_host='localhost', - input_port=88, input_url='test.json') - mod = ArgSchemaParser(schema_type=MySchema, - input_source=input_source, args=[]) - assert(mod.args['a'] == 7) - - -@mock.patch('requests.get', side_effect=mocked_requests_get) -def test_url_parser_command_line(mock_get): - mod = UrlArgSchemaParser( - args=['--input_host', 'localhost', '--input_port', '88', '--input_url', 'test.json']) - assert(mod.args['a'] == 7) diff --git a/test/_sources/test_yaml.py b/test/_sources/test_yaml.py deleted file mode 100644 index 96c0bce2..00000000 --- a/test/_sources/test_yaml.py +++ /dev/null @@ -1,90 +0,0 @@ -import argschema -from argschema.sources.yaml_source import YamlSource, YamlSink -from argschema.sources.json_source import JsonSource, JsonSink -from argschema.sources.source import MultipleConfiguredSourceError -from argschema.argschema_parser import ArgSchemaYamlParser -from test_classes import MySchema, MyOutputSchema -import yaml -import pytest -import json - -class MyParser(ArgSchemaYamlParser): - default_schema = MySchema - default_output_schema = MyOutputSchema - -class MyDualParser(MyParser): - default_configurable_sources = [JsonSource, YamlSource] - default_configurable_sinks = [JsonSink, YamlSink] - -input_data = { - 'a': 5, - 'nest': { - 'one': 7, - 'two': False - } -} - -@pytest.fixture(scope='module') -def test_yaml_input_file(tmpdir_factory): - file_in = tmpdir_factory.mktemp('test').join('test_input_yaml.yml') - - with open(str(file_in), 'w') as fp: - yaml.dump(input_data, fp, default_flow_style=False) - return str(file_in) - -@pytest.fixture(scope='module') -def test_json_input_file(tmpdir_factory): - file_in = tmpdir_factory.mktemp('test').join('test_input_json.json') - - with open(str(file_in), 'w') as fp: - json.dump(input_data, fp) - return str(file_in) - - -def test_yaml_source(test_yaml_input_file): - source = YamlSource(input_yaml=test_yaml_input_file) - mod = MyParser(input_source=source, args=[]) - - -def test_yaml_source_command(test_yaml_input_file): - mod = MyParser(args=['--input_yaml', test_yaml_input_file]) - - -def test_yaml_sink(test_yaml_input_file, tmpdir): - outfile = tmpdir.join('test_out.yml') - output_data = { - 'a': 3 - } - source = YamlSource(input_yaml=test_yaml_input_file) - sink = YamlSink(output_yaml = str(outfile)) - mod = MyParser(input_source=source, - output_sink=sink) - mod.output(output_data) - - with open(str(outfile), 'r') as fp: - d = yaml.load(fp) - output_data['b'] = "my value" - assert (output_data == d) - -def test_dual_parser(test_json_input_file,test_yaml_input_file): - - mod = MyDualParser(args=['--input_yaml', test_yaml_input_file]) - assert mod.args['a']==5 - assert mod.args['nest']==input_data['nest'] - - mod = MyDualParser(args=['--input_json', test_json_input_file]) - assert mod.args['a']==5 - assert mod.args['nest']==input_data['nest'] - -def test_dual_parser_fail(test_json_input_file,test_yaml_input_file): - with pytest.raises(MultipleConfiguredSourceError): - mod = MyDualParser(args=['--input_yaml', test_yaml_input_file, '--input_json', test_json_input_file]) - -def test_dual_parser_output_fail(test_json_input_file,tmpdir): - test_json_output = str(tmpdir.join('output.yml')) - test_yaml_output = str(tmpdir.join('output.json')) - with pytest.raises(MultipleConfiguredSourceError): - mod = MyDualParser(args=['--input_json', test_json_input_file, - '--output_json',test_json_output, - '--output_yaml',test_yaml_output]) - \ No newline at end of file diff --git a/test/sources/test_json.py b/test/sources/test_json.py new file mode 100644 index 00000000..08bbb8d0 --- /dev/null +++ b/test/sources/test_json.py @@ -0,0 +1,27 @@ +import json + +import pytest + +from argschema.sources import json_source + + +def test_json_source_get_dict(tmpdir_factory): + path = str(tmpdir_factory.mktemp("test_json_source").join("inp.json")) + + with open(path, "w") as jf: + json.dump({"a": 12}, jf) + + source = json_source.JsonSource() + source.load_config({"input_json": path}) + + assert source.get_dict()["a"] == 12 + +def test_json_sink_put_dict(tmpdir_factory): + path = str(tmpdir_factory.mktemp("test_json_source").join("out.json")) + + sink = json_source.JsonSink() + sink.load_config({"output_json": path}) + sink.put_dict({"a": 13}) + + with open(path, "r") as jf: + assert json.load(jf)["a"] == 13 \ No newline at end of file diff --git a/test/sources/test_parser_integration.py b/test/sources/test_parser_integration.py index adc2ff24..85b82d31 100644 --- a/test/sources/test_parser_integration.py +++ b/test/sources/test_parser_integration.py @@ -1,11 +1,12 @@ import json -import yaml +import os import pytest +import yaml import argschema -from argschema.sources.json_source import JsonSource -from argschema.sources.yaml_source import YamlSource +from argschema.sources.json_source import JsonSource, JsonSink +from argschema.sources.yaml_source import YamlSource, YamlSink from argschema.sources.source import MultipleConfiguredSourceError @@ -70,7 +71,6 @@ def test_json_input_args(json_inp, inp_sources): assert parser.args["a"] == 5 - @pytest.mark.parametrize("inp_sources", [ JsonSource(), [JsonSource()], JsonSource, [JsonSource] ]) @@ -83,7 +83,6 @@ def test_json_input_data(json_inp, inp_sources): assert parser.args["a"] == 5 - def test_multisource_arg(yaml_inp): parser = MyParser( input_sources=[JsonSource, YamlSource], @@ -91,10 +90,36 @@ def test_multisource_arg(yaml_inp): ) assert parser.args["a"] == 6 - def test_multisource_arg_conflict(json_inp, yaml_inp): with pytest.raises(MultipleConfiguredSourceError): parser = MyParser( input_sources=[JsonSource, YamlSource], args=["--input_yaml", yaml_inp, "--input_json", json_inp] ) + +def test_multisink(yaml_inp): + out_path = os.path.join(os.path.dirname(yaml_inp), "out.json") + + parser = MyParser( + output_schema_type=MyOutputSchema, + input_sources=YamlSource, + output_sinks=[YamlSink, JsonSink], + args=["--input_yaml", yaml_inp, "--output_json", out_path] + ) + + parser.output({"a": 12, "b": "16"}) + with open(out_path, "r") as out_file: + obt = json.load(out_file) + assert obt["a"] == 12 + +def test_multisink_conflicting(yaml_inp, json_inp): + yaml_out = os.path.join(os.path.dirname(yaml_inp), "out.yaml") + json_out = os.path.join(os.path.dirname(json_inp), "out.json") + + with pytest.raises(MultipleConfiguredSourceError): + parser = MyParser( + output_schema_type=MyOutputSchema, + input_sources=[YamlSource], + output_sinks=[JsonSink, YamlSink], + args=["--output_yaml", yaml_out, "--output_json", json_out] + ) diff --git a/test/sources/test_url.py b/test/sources/test_url.py new file mode 100644 index 00000000..b57b0daa --- /dev/null +++ b/test/sources/test_url.py @@ -0,0 +1,37 @@ +import requests +import mock +from argschema.sources.url_source import UrlSource +from argschema import ArgSchemaParser + + +def mocked_requests_get(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + if args[0] == 'http://localhost:88/test.json': + return MockResponse({ + 'a': 7, + 'nest': { + 'one': 7, + 'two': False + } + }, 200) + return MockResponse(None, 404) + + +@mock.patch('requests.get', side_effect=mocked_requests_get) +def test_url_parser_get_dict(mock_get): + source = UrlSource() + source.load_config({ + "input_host": "localhost", + "input_port": 88, + "input_url": "test.json", + }) + + obtained = source.get_dict() + assert obtained["a"] == 7 \ No newline at end of file diff --git a/test/sources/test_yaml.py b/test/sources/test_yaml.py new file mode 100644 index 00000000..bad20703 --- /dev/null +++ b/test/sources/test_yaml.py @@ -0,0 +1,25 @@ +import pytest +import yaml + +from argschema.sources import yaml_source + +def test_json_source_get_dict(tmpdir_factory): + path = str(tmpdir_factory.mktemp("test_yaml_source").join("inp.yaml")) + + with open(path, "w") as jf: + yaml.dump({"a": 12}, jf) + + source = yaml_source.YamlSource() + source.load_config({"input_yaml": path}) + + assert source.get_dict()["a"] == 12 + +def test_json_sink_put_dict(tmpdir_factory): + path = str(tmpdir_factory.mktemp("test_yaml_source").join("out.yaml")) + + sink = yaml_source.YamlSink() + sink.load_config({"output_yaml": path}) + sink.put_dict({"a": 13}) + + with open(path, "r") as jf: + assert yaml.load(jf)["a"] == 13 \ No newline at end of file From 1d414f3ccf1c1eebe601ab566f315f58d35866de Mon Sep 17 00:00:00 2001 From: nile graddis Date: Mon, 24 Feb 2020 19:02:24 -0800 Subject: [PATCH 79/86] docstrings for config source; some renaming --- argschema/argschema_parser.py | 157 ++++++++++++++++-------- argschema/sources/__init__.py | 2 +- argschema/sources/json_source.py | 20 ++- argschema/sources/source.py | 118 ++++++++++++------ argschema/sources/url_source.py | 7 +- argschema/sources/yaml_source.py | 18 ++- test/sources/test_parser_integration.py | 6 +- 7 files changed, 224 insertions(+), 104 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index b10dca26..6a96e808 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -1,7 +1,7 @@ '''Module that contains the base class ArgSchemaParser which should be subclassed when using this library ''' -from typing import List, Sequence, Dict, Optional, Union +from typing import List, Sequence, Dict, Optional, Union, Tuple, Type, TypeVar import logging from . import schemas from . import utils @@ -9,13 +9,27 @@ from .sources.json_source import JsonSource, JsonSink from .sources.yaml_source import YamlSource, YamlSink from .sources.source import ( - ArgSource, - ArgSink, - NotConfiguredSourceError, - MultipleConfiguredSourceError, + ConfigurableSource, + ConfigurableSink, + NonconfigurationError, + MultipleConfigurationError, ) +SourceType = Union[ConfigurableSource, Type[ConfigurableSource]] +RegistrableSources = Union[ + None, + SourceType, + Sequence[SourceType], +] +SinkType = Union[ConfigurableSink, Type[ConfigurableSink]] +RegistrableSinks = Union[ + None, + SinkType, + Sequence[SinkType], +] + + class ArgSchemaParser(object): """The main class you should sub-class to write your own argschema module. Takes input_data, reference to a input_json and the command line inputs and parses out the parameters @@ -33,9 +47,9 @@ class ArgSchemaParser(object): output_schema_type : marshmallow.Schema the schema to use to validate the output, used by self.output input_sources : Sequence[argschema.sources.source.ConfigurableSource] - Each of these will be considered + each of these will be considered as a potential source of input data output_sinks : Sequence[argschema.sources.source.ConfigurableSource] - a generic sink to write output dictionary to + each of these will be considered as a potential sink for output data args : list or None command line arguments passed to the module, if None use argparse to parse the command line, set to [] if you want to bypass command line parsing @@ -51,25 +65,25 @@ class ArgSchemaParser(object): """ default_schema = schemas.ArgSchema default_output_schema = None - default_sources = (JsonSource,) - default_sinks = (JsonSink,) + default_sources: Tuple[SourceType] = (JsonSource,) + default_sinks: Tuple[SinkType] = (JsonSink,) @property - def input_sources(self) -> List[ArgSource]: + def input_sources(self) -> List[ConfigurableSource]: if not hasattr(self, "_input_sources"): - self._input_sources = [] + self._input_sources: List[ConfigurableSource] = [] return self._input_sources @property - def output_sinks(self) -> List[ArgSource]: + def output_sinks(self) -> List[ConfigurableSink]: if not hasattr(self, "_output_sinks"): - self._output_sinks = [] + self._output_sinks: List[ConfigurableSink] = [] return self._output_sinks @property def io_schemas(self) -> List[mm.Schema]: if not hasattr(self, "_io_schemas"): - self._io_schemas = [] + self._io_schemas: List[mm.Schema] = [] return self._io_schemas @io_schemas.setter @@ -105,23 +119,31 @@ def __init__(self, self.output_schema_type = output_schema_type self.logger = self.initialize_logger( - logger_name, self.args.get('log_level')) + logger_name, self.args.get('log_level')) def register_sources( self, - sources: Union[None, Sequence[ArgSource], ArgSource] + sources: RegistrableSources ): - """consolidate a list of the input and output source command line - configuration schemas + """consolidate a list of the input source configuration schemas + + Parameters + ---------- + sources : + Each source will be registered (and may then be configured by data + passed to this parser). If None is argued, the default_sources + associated with this class will be registered. + """ - if isinstance(sources, (ArgSource, type)): - sources = [sources] + if isinstance(sources, (ConfigurableSource, type)): + coerced_sources: Sequence[SourceType] = [sources] elif sources is None: - sources = self.default_sources - + coerced_sources = self.default_sources + else: + coerced_sources = sources - for source in sources: + for source in coerced_sources: if isinstance(source, type): source = source() self.io_schemas.append(source.schema) @@ -129,25 +151,48 @@ def register_sources( def register_sinks( self, - sinks: Union[None, Sequence[ArgSink], ArgSink] + sinks: RegistrableSinks ): - """ + """Consolidate a list of the output sink configuration schemas + + Parameters + ---------- + sinks : + Each sink will be registered (and may then be configured by data + passed to this parser). If None is argued, the default_sinks + associated with this class will be registered. + """ - if isinstance(sinks, (ArgSink, type)): - sinks = [sinks] + if isinstance(sinks, (ConfigurableSink, type)): + coerced_sinks: Sequence[SinkType] = [sinks] elif sinks is None: - sinks = self.default_sinks + coerced_sinks = self.default_sinks + else: + coerced_sinks = sinks - for sink in sinks: + for sink in coerced_sinks: if isinstance(sink, type): sink = sink() self.io_schemas.append(sink.schema) self.output_sinks.append(sink) - def parse_command_line(self, args: Optional[List]): - """ build a command line parser from the input schemas and - configurations + def parse_command_line(self, args: Optional[List[str]]) -> Dict: + """Build a command line parser from the input schemas and + configurations. Parse command line arguments using this parser + + Parameters + ---------- + args : list of str or None + Will be passed directly to argparse's parse_args. If None, sys.argv + will be used. If provided, should be formatted like: + ["positional_arg", "--optional_arg", "optional_value"] + + Returns + ------- + argsdict : dict + a (potentially nested) dictionary of parsed command line arguments + """ parser = utils.schema_argparser(self.schema, self.io_schemas) argsobj = parser.parse_args(args) @@ -158,6 +203,25 @@ def parse_command_line(self, args: Optional[List]): def resolve_inputs(self, input_data: Dict, argsdict: Dict) -> Dict: """ Resolve input source by checking candidate sources against constructor and command line arguments + + Parameters + ---------- + input_data : dict + Manually (on ArgschemaParser construction) specified parameters. + Will be overridden if values are successfully extracted from + argsdict. + argsdict : dict + Command line parameters, parsed into a nested dictionary. + + Returns + ------- + args : dict + A fully merged (possibly nested) collection of inputs. May draw from + 1. input data + 2. the argsdict + 3. any configurable sources whose config schemas are satisfied + by values in the above + """ config_data = self.__get_input_data_from_config(input_data) @@ -175,21 +239,21 @@ def resolve_inputs(self, input_data: Dict, argsdict: Dict) -> Dict: return args def __get_output_sink_from_config(self, d): - """private function to check for ArgSink configuration in a dictionary and return a configured ArgSink + """private function to check for ConfigurableSink configuration in a dictionary and return a configured ConfigurableSink Parameters ---------- d : dict - dictionary to look for ArgSink Configuration parameters in + dictionary to look for ConfigurableSink Configuration parameters in Returns ------- - ArgSink - A configured argsink + ConfigurableSink + A configured ConfigurableSink Raises ------ - MultipleConfiguredSourceError + MultipleConfigurationError If more than one Sink is configured """ output_set = False @@ -199,16 +263,16 @@ def __get_output_sink_from_config(self, d): sink.load_config(d) if output_set: - raise MultipleConfiguredSourceError( + raise MultipleConfigurationError( "more then one OutputSink configuration present in {}".format(d)) output_sink = sink output_set = True - except NotConfiguredSourceError: + except NonconfigurationError: pass return output_sink def __get_input_data_from_config(self, d): - """private function to check for ArgSource configurations in a dictionary + """private function to check for ConfigurableSource configurations in a dictionary and return the data if it exists Parameters @@ -223,7 +287,7 @@ def __get_input_data_from_config(self, d): Raises ------ - MultipleConfiguredSourceError + MultipleConfigurationError if more than one InputSource is configured """ input_set = False @@ -233,10 +297,10 @@ def __get_input_data_from_config(self, d): source.load_config(d) input_data = source.get_dict() if input_set: - raise MultipleConfiguredSourceError( + raise MultipleConfigurationError( "more then one InputSource configuration present in {}".format(d)) input_set = True - except NotConfiguredSourceError as e: + except NonconfigurationError as e: pass return input_data @@ -279,7 +343,7 @@ def output(self, d, sink=None): ---------- d:dict output dictionary to output - sink: argschema.sources.source.ArgSink + sink: argschema.sources.source.ConfigurableSink output_sink to output to (optional default to self.output_source) Raises @@ -346,8 +410,3 @@ def initialize_logger(name, log_level): logger = logging.getLogger(name) logger.setLevel(level=level) return logger - - -class ArgSchemaYamlParser(ArgSchemaParser): - default_sources = [YamlSource] - default_sinks = [YamlSink] diff --git a/argschema/sources/__init__.py b/argschema/sources/__init__.py index 7d604108..76bfa557 100644 --- a/argschema/sources/__init__.py +++ b/argschema/sources/__init__.py @@ -1,2 +1,2 @@ -from argschema.sources.source import ArgSink, ArgSource +from argschema.sources.source import ConfigurableSource, ConfigurableSink from argschema.sources.json_source import JsonSource, JsonSink diff --git a/argschema/sources/json_source.py b/argschema/sources/json_source.py index 8dab3e95..22ec191f 100644 --- a/argschema/sources/json_source.py +++ b/argschema/sources/json_source.py @@ -1,4 +1,4 @@ -from .source import ArgSource, ArgSink +from argschema.sources.source import ConfigurableSource, ConfigurableSink import json import marshmallow as mm import argschema @@ -16,7 +16,12 @@ class JsonOutputConfigSchema(mm.Schema): description='whether to indent options or not') -class JsonSource(ArgSource): +class JsonSource(ConfigurableSource): + """ A configurable source which reads values from a json. Expects + --input_json + to be specified. + """ + ConfigSchema = JsonInputConfigSchema def get_dict(self): @@ -24,9 +29,14 @@ def get_dict(self): return json.load(fp,) -class JsonSink(ArgSink): +class JsonSink(ConfigurableSink): + """ A configurable sink which writes values to a json. Expects + --output_json + to be specified. + """ ConfigSchema = JsonOutputConfigSchema - def put_dict(self, d): + def put_dict(self, data): with open(self.config["output_json"], 'w') as fp: - json.dump(d, fp, indent=self.config.get("output_json_indent", None)) + json.dump( + data, fp, indent=self.config.get("output_json_indent", None)) diff --git a/argschema/sources/source.py b/argschema/sources/source.py index 6670ecaf..0d4e49c3 100644 --- a/argschema/sources/source.py +++ b/argschema/sources/source.py @@ -1,103 +1,143 @@ import abc -from typing import Dict +from typing import Dict, Type import marshmallow as mm -class ConfigurableSourceError(mm.ValidationError): - """Base Exception class for configurable sources""" +class ConfigurationError(mm.ValidationError): + """Base Exception class for configurations""" pass -class MisconfiguredSourceError(ConfigurableSourceError): - """Exception when a source configuration was present in part but failed +class MisconfigurationError(ConfigurationError): + """Exception when a configuration was present in part but failed validation""" pass -class NotConfiguredSourceError(ConfigurableSourceError): - """Exception when the source configuration is simply completely missing""" +class NonconfigurationError(ConfigurationError): + """Exception when a configuration is simply completely missing""" pass -class MultipleConfiguredSourceError(ConfigurableSourceError): - """Exception when there is more than one validly configured Source configured""" +class MultipleConfigurationError(ConfigurationError): + """Exception when there is more than one valid configuration""" pass -def d_contains_any_fields(schema, d): +def d_contains_any_fields(schema: mm.Schema, data: Dict) -> bool: """function to test if a dictionary contains any elements of a schema Parameters ---------- schema: marshmallow.Schema a marshmallow schema to test d with - d: dict + data: dict the dictionary to test whether it contains any elements of a schema Returns ------- bool: - True/False whether d contains any elements of a schema. If a schema contains no elements, returns True + True/False whether d contains any elements of a schema. If a schema + contains no elements, returns True """ if len(schema.declared_fields) == 0: return True + for field_name, field in schema.declared_fields.items(): - if field_name in d.keys(): - if d[field_name] is not None: + if field_name in data.keys(): + if data[field_name] is not None: return True + return False -class ConfigSourceSchema(mm.Schema): - pass +class Configurable(object): + """Base class for sources and sinks of marshmallow-validatable + parameters. + + Parameters + ---------- + **default_config : dict + Optionally, attempt to load a config immediately upon construction + + Attributes + ---------- + ConfigSchema : type(mm.Schema), class attribute + Defines a schema for this Configurable's config. + config : dict + Stores for values loaded according to this instance's schema + schema : mm.Schema + An instance of this class's ConfigSchema. Used to validate potential + configurations. + + """ + + ConfigSchema: Type[mm.Schema] = mm.Schema -class ConfigurableSource(object): - ConfigSchema = ConfigSourceSchema + def __init__(self, **default_config: Dict): - def __init__(self, **kwargs): - """Configurable source + self.schema: mm.Schema = self.ConfigSchema() + self.config: Dict = {} + + if default_config: + self.load_config(default_config) + + def load_config(self, candidate: Dict): + """Attempt to configure this object inplace using values in a candidate + dictionary. Parameters ---------- - **kwargs: dict - a set of keyword arguments which will be validated by this classes ConfigSchema - which will define the set of fields that are allowed (and their defaults) - """ - self.schema = self.ConfigSchema() - self.config = {} + candidate : dict + Might satisfy (and will be loaded using) this object's schema. + + Raises + ------ + NonconfigurationError : Indicates that the candidate was completely + inapplicable. + MisconfigurationError : Indicates that the candidate did not adequetly + satisfy this configurable's schema. - def load_config(self, candidate: Dict): - """ """ if candidate is None: - raise NotConfiguredSourceError("No data was provided") + candidate = {} if not d_contains_any_fields(self.schema, candidate): - raise NotConfiguredSourceError( + raise NonconfigurationError( "This source is not present in \n {}".format(candidate)) try: self.config = self.schema.load(candidate, unknown=mm.EXCLUDE) except mm.ValidationError as e: - raise MisconfiguredSourceError( + raise MisconfigurationError( "Source incorrectly configured\n {}".format(e)) -class ArgSource(ConfigurableSource): - def get_dict(self): - """method that must be implemented to enable an ArgSource to return a dictionary""" +class ConfigurableSource(Configurable): + def get_dict(self) -> Dict: + """Produces a dictionary, potentially using information from this + source's config. + + Returns + ------- + dict : Suitable for validatation by some external marshmallow schema. + """ + raise NotImplementedError() -class ArgSink(ConfigurableSource): - def put_dict(self, d): - """method that must be implemented to enable an ArgSink to write a dictionary + +class ConfigurableSink(Configurable): + def put_dict(self, data: Dict): + """Writes a dictionary, potentially using information from this + sink's config. Parameters ---------- - d: dict - the dictionary to write + dict : Will be written to some external sink. + """ + raise NotImplementedError() diff --git a/argschema/sources/url_source.py b/argschema/sources/url_source.py index 48456d8b..3a270255 100644 --- a/argschema/sources/url_source.py +++ b/argschema/sources/url_source.py @@ -1,4 +1,4 @@ -from argschema.sources import ArgSource, ArgSink +from argschema.sources import ConfigurableSource from argschema.schemas import DefaultSchema from argschema.fields import Str,Int from argschema import ArgSchemaParser @@ -14,7 +14,10 @@ class UrlSourceConfig(DefaultSchema): input_url = Str(required=True, description="location on host of input") input_protocol = Str(required=False, default='http', description="url protocol to use") -class UrlSource(ArgSource): +class UrlSource(ConfigurableSource): + """ A configurable source which obtains values by making a GET request, + expecting a JSON response. + """ ConfigSchema = UrlSourceConfig def get_dict(self): diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index eb8f2dd0..878a6cc2 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -1,5 +1,5 @@ import yaml -from argschema.sources.source import ArgSource, ArgSink +from argschema.sources.source import ConfigurableSource, ConfigurableSink import argschema import marshmallow as mm @@ -14,7 +14,11 @@ class YamlOutputConfigSchema(mm.Schema): description='filepath to save output yaml') -class YamlSource(ArgSource): +class YamlSource(ConfigurableSource): + """ A configurable source which reads values from a yaml. Expects + --input_yaml + to be specified. + """ ConfigSchema = YamlInputConfigSchema def get_dict(self): @@ -22,9 +26,13 @@ def get_dict(self): return yaml.load(fp) -class YamlSink(ArgSink): +class YamlSink(ConfigurableSink): + """ A configurable sink which writes values to a yaml. Expects + --output_yaml + to be specified. + """ ConfigSchema = YamlOutputConfigSchema - def put_dict(self, d): + def put_dict(self, data): with open(self.config["output_yaml"], 'w') as fp: - yaml.dump(d, fp, default_flow_style=False) + yaml.dump(data, fp, default_flow_style=False) diff --git a/test/sources/test_parser_integration.py b/test/sources/test_parser_integration.py index 85b82d31..c607d28e 100644 --- a/test/sources/test_parser_integration.py +++ b/test/sources/test_parser_integration.py @@ -7,7 +7,7 @@ import argschema from argschema.sources.json_source import JsonSource, JsonSink from argschema.sources.yaml_source import YamlSource, YamlSink -from argschema.sources.source import MultipleConfiguredSourceError +from argschema.sources.source import MultipleConfigurationError class MyNestedSchema(argschema.schemas.DefaultSchema): @@ -91,7 +91,7 @@ def test_multisource_arg(yaml_inp): assert parser.args["a"] == 6 def test_multisource_arg_conflict(json_inp, yaml_inp): - with pytest.raises(MultipleConfiguredSourceError): + with pytest.raises(MultipleConfigurationError): parser = MyParser( input_sources=[JsonSource, YamlSource], args=["--input_yaml", yaml_inp, "--input_json", json_inp] @@ -116,7 +116,7 @@ def test_multisink_conflicting(yaml_inp, json_inp): yaml_out = os.path.join(os.path.dirname(yaml_inp), "out.yaml") json_out = os.path.join(os.path.dirname(json_inp), "out.json") - with pytest.raises(MultipleConfiguredSourceError): + with pytest.raises(MultipleConfigurationError): parser = MyParser( output_schema_type=MyOutputSchema, input_sources=[YamlSource], From 152212ba12831bf8b2e393290cee8f3c203c9ce4 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Mon, 24 Feb 2020 19:26:22 -0800 Subject: [PATCH 80/86] avoid security vulnerability in yaml sour e --- argschema/sources/yaml_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argschema/sources/yaml_source.py b/argschema/sources/yaml_source.py index 878a6cc2..3b6a90db 100644 --- a/argschema/sources/yaml_source.py +++ b/argschema/sources/yaml_source.py @@ -23,7 +23,7 @@ class YamlSource(ConfigurableSource): def get_dict(self): with open(self.config["input_yaml"], 'r') as fp: - return yaml.load(fp) + return yaml.load(fp, Loader=yaml.FullLoader) class YamlSink(ConfigurableSink): From ffdffc351161833209f7830a20f8a2bc68d7c011 Mon Sep 17 00:00:00 2001 From: nile graddis Date: Mon, 24 Feb 2020 19:26:43 -0800 Subject: [PATCH 81/86] add example of multisource argschema parsing --- examples/multisource_example.json | 6 ++++ examples/multisource_example.py | 48 +++++++++++++++++++++++++++++++ examples/multisource_example.yaml | 3 ++ 3 files changed, 57 insertions(+) create mode 100644 examples/multisource_example.json create mode 100644 examples/multisource_example.py create mode 100644 examples/multisource_example.yaml diff --git a/examples/multisource_example.json b/examples/multisource_example.json new file mode 100644 index 00000000..49ad4497 --- /dev/null +++ b/examples/multisource_example.json @@ -0,0 +1,6 @@ +{ + "a_subschema": { + "an_int": 12 + }, + "a_float": 15.5 +} \ No newline at end of file diff --git a/examples/multisource_example.py b/examples/multisource_example.py new file mode 100644 index 00000000..7ac062cb --- /dev/null +++ b/examples/multisource_example.py @@ -0,0 +1,48 @@ +"""This example shows you how to register multiple input sources for your executable, which users can then select from dynamically when running it. This feature makes your code a bit more flexible about the format of the input parameters. + +There is a similar feature (not shown here) for specifying output sinks. It follows the same pattern. + +Usage +----- +# you can load parameters from a yaml ... +$ python examples/multisource_example.py --input_yaml examples/multisource_example.yaml +{'a_subschema': {'an_int': 13}, 'log_level': 'ERROR', 'a_float': 16.7} + +# ... or from an input json ... +$ python examples/multisource_example.py --input_json examples/multisource_example.json +{'a_float': 15.5, 'a_subschema': {'an_int': 12}, 'log_level': 'ERROR'} + +# ... but not both +$ python examples/multisource_example.py --input_json examples/multisource_example.json --input_yaml examples/multisource_example.yaml +argschema.sources.source.MultipleConfigurationError: more then one InputSource configuration present in {'input_json': 'examples/multisource_example.json', 'input_yaml': 'examples/multisource_example.yaml'} + +# command line parameters still override sourced ones +$ python examples/multisource_example.py --input_json examples/multisource_example.json --a_float 13.1 +{'a_float': 13.1, 'a_subschema': {'an_int': 12}, 'log_level': 'ERROR'} + +""" + +import argschema + +class SubSchema(argschema.schemas.DefaultSchema): + an_int = argschema.fields.Int() + +class MySchema(argschema.ArgSchema): + a_subschema = argschema.fields.Nested(SubSchema) + a_float = argschema.fields.Float() + + +def main(): + + parser = argschema.ArgSchemaParser( + schema_type=MySchema, + input_sources=[ # each source provided here will be checked against command-line arguments + argschema.sources.json_source.JsonSource, # ArgschemaParser includes this source by default + argschema.sources.yaml_source.YamlSource + ] + ) + + print(parser.args) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/multisource_example.yaml b/examples/multisource_example.yaml new file mode 100644 index 00000000..7323832d --- /dev/null +++ b/examples/multisource_example.yaml @@ -0,0 +1,3 @@ +a_subschema : + an_int: 13 +a_float: 16.7 \ No newline at end of file From 9dafe5f70d15150df09a092aefbe665689d250e8 Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Thu, 5 Mar 2020 15:30:44 -0800 Subject: [PATCH 82/86] add missing docstring types; raise if url_source response is bad --- argschema/argschema_parser.py | 4 ++-- argschema/sources/url_source.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/argschema/argschema_parser.py b/argschema/argschema_parser.py index 6a96e808..03b1ffce 100644 --- a/argschema/argschema_parser.py +++ b/argschema/argschema_parser.py @@ -129,7 +129,7 @@ def register_sources( Parameters ---------- - sources : + sources : (sequence of) ConfigurableSource or None Each source will be registered (and may then be configured by data passed to this parser). If None is argued, the default_sources associated with this class will be registered. @@ -157,7 +157,7 @@ def register_sinks( Parameters ---------- - sinks : + sinks : (sequence of) ConfigurableSink or None Each sink will be registered (and may then be configured by data passed to this parser). If None is argued, the default_sinks associated with this class will be registered. diff --git a/argschema/sources/url_source.py b/argschema/sources/url_source.py index 3a270255..e7ee6fd1 100644 --- a/argschema/sources/url_source.py +++ b/argschema/sources/url_source.py @@ -33,4 +33,5 @@ def get_dict(self): )) response = requests.get(url) + response.raise_for_status() return response.json() From d58f2ef85c60fb26113111a91e688a7f05bad0ca Mon Sep 17 00:00:00 2001 From: nilegraddis Date: Thu, 5 Mar 2020 15:35:00 -0800 Subject: [PATCH 83/86] update url_source test class to handle raise_for_status --- test/sources/test_url.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/sources/test_url.py b/test/sources/test_url.py index b57b0daa..5215476e 100644 --- a/test/sources/test_url.py +++ b/test/sources/test_url.py @@ -10,6 +10,10 @@ def __init__(self, json_data, status_code): self.json_data = json_data self.status_code = status_code + def raise_for_status(self): + if self.status_code >= 400: + raise requests.exceptions.HTTPError() + def json(self): return self.json_data From b02edc47afe0301cff0079c809f74f4f971d6d16 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 13 Apr 2020 09:05:52 -0700 Subject: [PATCH 84/86] initial readme change --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 2e3e3e60..e0e25143 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,10 @@ OR pass a json_dictionary directly into the module with the parameters defined AND/OR pass parameters via the command line, in a way that will override the input_json or the json_dictionary given. +## Upgrading to version 3.0 +The major change in argschema 3.0 is introducing a more generalized interface for reading and writing dictionaries, referred to as ArgSource and ArgSink. + + ## Upgrading to version 2.0 The major change in argschema 2.0 is becoming compatible with marshmallow 3, which changes From bbfb96cbb58eb5496a618d99a9a664f1d1495a81 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 13 Apr 2020 09:25:14 -0700 Subject: [PATCH 85/86] edit readme --- README.md | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e0e25143..0fe6e292 100644 --- a/README.md +++ b/README.md @@ -14,17 +14,6 @@ OR pass a json_dictionary directly into the module with the parameters defined AND/OR pass parameters via the command line, in a way that will override the input_json or the json_dictionary given. -## Upgrading to version 3.0 -The major change in argschema 3.0 is introducing a more generalized interface for reading and writing dictionaries, referred to as ArgSource and ArgSink. - - -## Upgrading to version 2.0 -The major change in argschema 2.0 is becoming -compatible with marshmallow 3, which changes -many of the ways your schemas and schema modifications work. Some noteable differences are that schemas are strict now by default, so tossing keys in your outputs or inputs that were ignored and stripped before now throw errors unless -def -Please read this document for more guidance -https://marshmallow.readthedocs.io/en/stable/upgrading.html ## Level of Support We are planning on occasional updating this tool with no fixed schedule. Community involvement is encouraged through both issues and pull requests. Please make pull requests against the dev branch, as we will test changes there before merging into master. @@ -79,7 +68,7 @@ You start building some code in an ipython notebook to play around with a new id It's a mess, and you know you should migrate your code over to a module that you can call from other programs or notebooks. You start collecting your input variables to the top of the notebook and make yourself a wrapper function that you can call. However, now your mistake in filename typing is a disaster because the file doesn't exist, and your code doesn't check for the existence of the file until quite late. You start implementing some input validation checks to avoid this problem. -Now you start wanting to integrate this code with other things, including elements that aren't in python. You decide that you need to have a command line module that executes the code, because then you can use other tools to stitch together your processing, like maybe some shell scripts or docker run commands. You implement an argparse set of inputs and default values that make your python program a self-contained program, with some help documentation. Along the way, you have to refactor the parsed argparse variables into your function and strip out your old hacky validation code to avoid maintaining two versions of validation in the future. +Now you start wanting to integrate this code with other things, including elements that aren't in python. You decide that you need to have a command line module that executes the code, because then you can use other tools to stitch together your processing, like maybe some shell scripts or docker run commands. You implement an argparse set of inputs and default values that make your python program a self-contained program, with some helpful documentation. Along the way, you have to refactor the parsed argparse variables into your function and strip out your old hacky validation code to avoid maintaining two versions of validation in the future. This module starts becoming useful enough that you want to integrate it into more complex modules. You end up copying and pasting various argparse lines over to other modules, and then 5 other modules. Later you decide to change your original module a little bit, and you have a nightmare of code replace to fix up the other modules to mirror this phenomenon.. you kick yourself for not having thought this through more clearly. @@ -89,5 +78,25 @@ If you had only designed things from the beginning to allow for each of these us This is what argschema is designed to do. + +## Upgrading to version 3.0 +The major change in argschema 3.0 is introducing a more generalized interface for reading and writing dictionaries, referred to as ArgSource and ArgSink. One can define customized classes that read dictionaries from any source you can code, such as making a database call, reading from a web service, reading a yaml file, etc. Argschema isn't just for json anymore. Similarly you can now dynamically tell your ArgSchemaParser to write output to an Argsink, which might write to a database, a webservice, or a messaging service. This enables those integrating modules into larger workflow management solutions more flexibility in wiring up your python modules to those systems. + +It also removes features that were marked previously as deprecated. + +Notably parsing List arguments with --listarg a b c, which instead should be called as --listarg a,b,c. In other words cli_as_single_argument = False is no longer an option. + +It also removes the old names JsonModule, ModuleParameters, which are now ArgSchemaParser and ArgSchema respectively. + +The field OptionList has been removed. The same functionality can be accomplished with the keyword, validate=mm.validate.OneOf([a,b,c...]) in the field definition. + +## Upgrading to version 2.0 +The major change in argschema 2.0 is becoming +compatible with marshmallow 3, which changes +many of the ways your schemas and schema modifications work. Some noteable differences are that schemas are strict now by default, so tossing keys in your outputs or inputs that were ignored and stripped before now throw errors. + +Please read this document for more guidance +https://marshmallow.readthedocs.io/en/stable/upgrading.html + Copyright 2017 Allen Institute From 113b6f6e26ac77652afbac0a49394c10ec6f6c2f Mon Sep 17 00:00:00 2001 From: Tom Chartrand Date: Tue, 6 Oct 2020 10:34:25 -0700 Subject: [PATCH 86/86] doc updates for ConfigurableSource/Sink --- README.md | 2 +- docs/user/intro.rst | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 0fe6e292..6cb6ac2c 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ This is what argschema is designed to do. ## Upgrading to version 3.0 -The major change in argschema 3.0 is introducing a more generalized interface for reading and writing dictionaries, referred to as ArgSource and ArgSink. One can define customized classes that read dictionaries from any source you can code, such as making a database call, reading from a web service, reading a yaml file, etc. Argschema isn't just for json anymore. Similarly you can now dynamically tell your ArgSchemaParser to write output to an Argsink, which might write to a database, a webservice, or a messaging service. This enables those integrating modules into larger workflow management solutions more flexibility in wiring up your python modules to those systems. +The major change in argschema 3.0 is introducing a more generalized interface for reading and writing dictionaries, referred to as ConfigurableSource and ConfigurableSink. One can define customized classes that read dictionaries from any source you can code, such as making a database call, reading from a web service, reading a yaml file, etc. Argschema isn't just for json anymore. Similarly you can now dynamically tell your ArgSchemaParser to write output to a ConfigurableSink, which might write to a database, a webservice, or a messaging service. This enables those integrating modules into larger workflow management solutions more flexibility in wiring up your python modules to those systems. It also removes features that were marked previously as deprecated. diff --git a/docs/user/intro.rst b/docs/user/intro.rst index 87a342e9..f2bdeffa 100644 --- a/docs/user/intro.rst +++ b/docs/user/intro.rst @@ -205,30 +205,30 @@ For example, yaml is another reasonable choice for storing nested key-value stor :class:`argschema.argschema_parser.ArgSchemaYamlParser` demonstrates just that functionality. So now input_yaml and output_yaml can be specified instead. -Furthermore, you can pass an ArgSchemaParser an :class:`argschema.sources.ArgSource` object which +Furthermore, you can pass an ArgSchemaParser a :class:`argschema.sources.ConfigurableSource` object which implements a get_dict method, and any :class:`argschema.ArgSchemaParser` will get its input parameters from that dictionary. Importantly, this is true even when the original module author didn't explicitly support passing parameters from that mechanism, and the parameters will still be deserialized and validated in a uniform manner. -Similarly you can pass an :class:`argschema.sources.ArgSink` object which implements a put_dict method, +Similarly you can pass a :class:`argschema.sources.ConfigurableSink` object which implements a put_dict method, and :class:`argschema.ArgSchemaParser.output` will output the dictionary however that -:class:`argschema.sources.ArgSink` specifies it should. +:class:`argschema.sources.ConfigurableSink` specifies it should. -Finally, both :class:`argschema.sources.ArgSource` and :class:`argschema.sources.ArgSink` +Finally, both :class:`argschema.sources.ConfigurableSource` and :class:`argschema.sources.ConfigurableSink` have a property called ConfigSchema, which is a :class:`marshmallow.Schema` for how to deserialize the kwargs to it's init class. For example, the default :class:`argschema.sources.json_source.JsonSource` has one string field of 'input_json'. This is how :class:`argschema.ArgSchemaParser` is told what keys and values -should be read to initialize a :class:`argschema.sources.ArgSource` or - :class:`argschema.sources.ArgSink` instance. +should be read to initialize a :class:`argschema.sources.ConfigurableSource` or + :class:`argschema.sources.ConfigurableSink` instance. -So for example, if you wanted to define a :class:`argschema.sources.ArgSource` which loaded a dictionary +So for example, if you wanted to define a :class:`argschema.sources.ConfigurableSource` which loaded a dictionary from a particular host, port and url, and a module which had a command line interface for setting that host port and url you could do so like this. -.. literalinclude:: ../../test/sources/url_source.py +.. literalinclude:: ../../argschema/sources/url_source.py so now a UrlArgSchemaParser would expect command line flags of '--input_host' and '--input_url', and optionally '--input_port','--input_protocol' (or look for them in input_data) and will look to download