Skip to content

Commit 2592216

Browse files
authored
Merge pull request #114 from dlt-hub/feat/detector_improvements
detector improvements, add contributing.md, small rendering fixes
2 parents 5556248 + f1d2c65 commit 2592216

27 files changed

+175416
-58
lines changed

CONTRIBUTING.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Contributing to dlt-init-openapi
2+
3+
To contribute to this Repo, you can do the following:
4+
5+
1. [Join our slack community](https://dlthub.com/community) and talk to us so if you want to extend dlt-init-openapi. Until we have a more comprehensive contribution guide, we're happy to help you get started there.
6+
2. Fork this repo and check it out
7+
3. Install all dependencies with `make dev` (you will need poetry for dependency management)
8+
4. Run the fast tests to verify that all is properly installed with `make test-fast`
9+
5. Do you code changes, write new tests if you add new features.
10+
6. Format and lint with `make format` and `make lint`
11+
7. Create a PR to this repo.

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,11 @@ $ dlt-init-openapi pokemon --path ./path/to/my_spec.yml
115115
- `--output-path PATH`: A path to render the output to
116116
- `--config PATH`: Path to the config file to use (see below)
117117
- `--no-interactive`: Skip endpoint selection and render all paths of the OpenAPI spec.
118-
- `--loglevel`: Set logging level for stdout output, defaults to 20 (INFO).
118+
- `--log-level`: Set logging level for stdout output, defaults to 20 (INFO).
119+
- `--global-limit`: Set a global limit on the generated source.
120+
- `--update-rest-api-source`: Update the locally cached rest_api verified source.
121+
- `--allow-openapi-2`: Allow to use OpenAPI v2. specs. Migration of the spec to 3.0 is recommended though.
122+
- `--version`: Show installed version of the generator.
119123
- `--help`: Show this message and exit.
120124

121125
## Config options

dlt_init_openapi/__init__.py

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from enum import Enum
44
from importlib.metadata import version
55
from pathlib import Path
6-
from typing import Optional, cast
6+
from typing import cast
77

88
import httpcore
99
import httpx
@@ -57,7 +57,10 @@ def render(self, dry: bool = False) -> None:
5757
logger.info("Rendering project")
5858
if self.config.endpoint_filter:
5959
filtered_endpoints = self.config.endpoint_filter(self.openapi.endpoints)
60-
self.openapi.endpoints.set_ids_to_render(filtered_endpoints)
60+
if filtered_endpoints:
61+
self.openapi.endpoints.set_ids_to_render(filtered_endpoints)
62+
else:
63+
logger.warning("You have not selected any endpoints, all endpoints will be rendered.")
6164
self.renderer.run(self.openapi, dry=dry)
6265
logger.success(f"Rendered project to: {self.config.project_dir}")
6366
logger.info("You can now run your pipeline from this folder with 'python pipeline.py'.")
@@ -84,30 +87,28 @@ def print_warnings(self) -> None:
8487
logger.warning(w.msg)
8588

8689

87-
def _get_document(*, url: Optional[str] = None, path: Optional[Path] = None, timeout: int = 60) -> bytes:
88-
if url is not None and path is not None:
90+
def _get_document(*, config: Config, timeout: int = 60) -> bytes:
91+
if config.spec_url is not None and config.spec_path is not None:
8992
raise ValueError("Provide URL or Path, not both.")
90-
if url is not None:
91-
logger.info(f"Downloading spec from {url}")
93+
if config.spec_url is not None:
94+
logger.info(f"Downloading spec from {config.spec_url}")
9295
try:
93-
response = httpx.get(url, timeout=timeout)
96+
response = httpx.get(config.spec_url, timeout=timeout)
9497
logger.success("Download complete")
9598
return response.content
9699
except (httpx.HTTPError, httpcore.NetworkError) as e:
97100
raise ValueError("Could not get OpenAPI document from provided URL") from e
98-
elif path is not None:
99-
logger.info(f"Reading spec from {path}")
100-
return Path(path).read_bytes()
101+
elif config.spec_path is not None:
102+
logger.info(f"Reading spec from {config.spec_path}")
103+
return Path(config.spec_path).read_bytes()
101104
else:
102105
raise ValueError("No URL or Path provided")
103106

104107

105108
def _get_project_for_url_or_path( # pylint: disable=too-many-arguments
106-
url: Optional[str],
107-
path: Optional[Path],
108109
config: Config = None,
109110
) -> Project:
110-
doc = _get_document(url=url, path=path)
111+
doc = _get_document(config=config)
111112

112113
renderer_cls = cast(BaseRenderer, import_class_from_string(config.renderer_class))
113114
detector_cls = cast(BaseDetector, import_class_from_string(config.detector_class))
@@ -123,8 +124,6 @@ def _get_project_for_url_or_path( # pylint: disable=too-many-arguments
123124

124125
def create_new_client(
125126
*,
126-
url: Optional[str] = None,
127-
path: Optional[Path] = None,
128127
config: Config = None,
129128
) -> Project:
130129
"""
@@ -134,8 +133,6 @@ def create_new_client(
134133
The project.
135134
"""
136135
project = _get_project_for_url_or_path(
137-
url=url,
138-
path=path,
139136
config=config,
140137
)
141138
project.parse()

dlt_init_openapi/cli/__init__.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,14 @@ def init(
4343
output_path: Optional[pathlib.Path] = typer.Option(None, help="A path to render the output to."),
4444
config_path: Optional[pathlib.Path] = typer.Option(None, "--config", help="Path to the config file to use"),
4545
interactive: bool = typer.Option(True, help="Wether to select needed endpoints interactively"),
46-
loglevel: int = typer.Option(20, help="Set logging level for stdout output, defaults to 20 (INFO)"),
46+
log_level: int = typer.Option(20, help="Set logging level for stdout output, defaults to 20 (INFO)"),
4747
global_limit: int = typer.Option(0, help="Set a global limit on the generated source"),
48-
update_rest_api_source: bool = typer.Option(
49-
False, help="Wether to update the locally cached rest_api verified source"
48+
allow_openapi_2: bool = typer.Option(
49+
False,
50+
"--allow-openapi-2",
51+
help="Allow to use OpenAPI v2. specs. Migration of the spec to 3.0 is recommended though.",
5052
),
53+
update_rest_api_source: bool = typer.Option(False, help="Update the locally cached rest_api verified source."),
5154
version: bool = typer.Option(False, "--version", callback=_print_version, help="Print the version and exit"),
5255
) -> None:
5356
"""Generate a new dlt pipeline"""
@@ -59,9 +62,10 @@ def init(
5962
output_path=output_path,
6063
config_path=config_path,
6164
interactive=interactive,
62-
loglevel=loglevel,
65+
log_level=log_level,
6366
global_limit=global_limit,
6467
update_rest_api_source=update_rest_api_source,
68+
allow_openapi_2=allow_openapi_2,
6569
)
6670

6771

@@ -73,16 +77,17 @@ def _init_command_wrapped(
7377
output_path: Optional[pathlib.Path] = None,
7478
config_path: Optional[pathlib.Path] = None,
7579
interactive: bool = True,
76-
loglevel: int = 20,
80+
log_level: int = 20,
7781
global_limit: int = 0,
7882
update_rest_api_source: bool = False,
83+
allow_openapi_2: bool = False,
7984
) -> None:
8085

8186
from dlt_init_openapi import create_new_client
8287

8388
# set up console logging
8489
logger.remove()
85-
logger.add(sys.stdout, level=loglevel)
90+
logger.add(sys.stdout, level=log_level)
8691
logger.success("Starting dlt openapi generator")
8792

8893
if not url and not path:
@@ -105,6 +110,9 @@ def _init_command_wrapped(
105110
"output_path": output_path,
106111
"endpoint_filter": questionary_endpoint_selection if interactive else None,
107112
"global_limit": global_limit,
113+
"spec_url": url,
114+
"spec_path": path,
115+
"allow_openapi_2": allow_openapi_2,
108116
},
109117
)
110118

@@ -117,8 +125,6 @@ def _init_command_wrapped(
117125
exit(0)
118126

119127
create_new_client(
120-
url=url,
121-
path=path,
122128
config=config,
123129
)
124130
logger.success("Pipeline created. Learn more at https://dlthub.com/docs. See you next time :)")

dlt_init_openapi/cli/cli_endpoint_selection.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ def questionary_endpoint_selection(endpoints: EndpointCollection) -> Set[str]:
1818
("italic", f" {endpoint.path}"),
1919
]
2020
choices.append(questionary.Choice(text, endpoint))
21-
if not choices:
22-
raise ValueError("No endpoints found")
2321
selected_endpoints: List[Endpoint] = questionary.checkbox(
24-
"Which resources would you like to generate?", choices
22+
"Which resources would you like to generate? Press enter to continue, "
23+
+ "if you do not select any resources, all of them will be rendered.",
24+
choices,
2525
).ask()
2626

2727
# return resource names of selected endpoints

dlt_init_openapi/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,14 @@ class Config(BaseModel):
5151
"""Set a limit on how many items are emitted from a resource"""
5252
parameter_default_value: str = "FILL_ME_IN"
5353
"""default to render for required parameters that do not have a default in the spec"""
54+
allow_openapi_2: bool = False
55+
"""Allow to use OpenAPI 2 specs"""
5456

5557
# internal, do not set via config file
5658
project_dir: Path = None
5759
pipeline_file_name: str = None
60+
spec_url: str = None
61+
spec_path: Path = None
5862

5963
def __init__(self, *args: Any, **kwargs: Any) -> None:
6064
super(Config, self).__init__(*args, **kwargs)

dlt_init_openapi/detector/default/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,12 @@ def detect_security_schemes(self, open_api: OpenapiParser) -> None:
126126
elif global_scheme and not global_scheme.supported:
127127
self._add_warning(UnsupportedSecuritySchemeWarning(global_scheme.type))
128128

129+
# set first auth as global scheme
130+
if open_api.security_schemes and not open_api.detected_global_security_scheme:
131+
global_scheme = list(open_api.security_schemes.values())[0]
132+
if global_scheme.supported:
133+
open_api.detected_global_security_scheme = global_scheme
134+
129135
def detect_resource_names(self, endpoints: EndpointCollection) -> None:
130136
"""iterate all endpoints and find a strategy to select the right resource name"""
131137

dlt_init_openapi/detector/default/const.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,17 @@
1111
PRIMARY_KEY_SUFFIXES = ["id", "pk"]
1212
PRIMARY_KEY_WORD_SEPARATORS = ["", "-", "_"]
1313

14-
RE_UNIQUE_KEY = re.compile(r"\b(unique|id|identifier)\b", re.IGNORECASE)
14+
RE_UNIQUE_KEY = re.compile(r"^(unique|id|identifier)$", re.IGNORECASE)
1515

1616
# pagination
17-
RE_PAGE_PARAM = re.compile(r"(?i)(page|page_number)", re.IGNORECASE)
18-
RE_TOTAL_PAGE_PROPERTY = re.compile(r"(?i)(total|count)", re.IGNORECASE)
19-
RE_OFFSET_PARAM = re.compile(r"(?i)(start|offset|skip)", re.IGNORECASE)
20-
RE_LIMIT_PARAM = re.compile(r"(?i)(limit|per_page|page_size|size)", re.IGNORECASE)
21-
RE_TOTAL_PROPERTY = re.compile(r"(?i)(total|count|total_count)", re.IGNORECASE)
22-
RE_CURSOR_PARAM = re.compile(r"(?i)(cursor|after|since)", re.IGNORECASE)
23-
RE_CURSOR_PROP = re.compile(r"(?i)(cursor|next_cursor)", re.IGNORECASE)
24-
RE_NEXT_PROPERTY = re.compile(r"(?i)(next|next_url|more)", re.IGNORECASE)
17+
RE_PAGE_PARAM = re.compile(r"^(page|page_number)$", re.IGNORECASE)
18+
RE_TOTAL_PAGE_PROPERTY = re.compile(r"^(total|count|totalPages)$", re.IGNORECASE)
19+
RE_OFFSET_PARAM = re.compile(r"^(start|offset|skip)$", re.IGNORECASE)
20+
RE_LIMIT_PARAM = re.compile(r"^(limit|per_page|page_size|size)$", re.IGNORECASE)
21+
RE_TOTAL_PROPERTY = re.compile(r"^(total|count|total_count|totalRecords|totalItems)$", re.IGNORECASE)
22+
RE_CURSOR_PARAM = re.compile(r"^(cursor|after|since)$", re.IGNORECASE)
23+
RE_CURSOR_PROP = re.compile(r"^(cursor|next_cursor)$", re.IGNORECASE)
24+
RE_NEXT_PROPERTY = re.compile(r"^(next|next_url|more)$", re.IGNORECASE)
2525
RE_MATCH_ALL = re.compile(r".*", re.IGNORECASE)
2626

2727
# content path discovery

dlt_init_openapi/exceptions.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from typing import List
2+
3+
14
class DltOpenAPIException(Exception):
25
pass
36

@@ -16,7 +19,9 @@ def __init__(self, swagger_detected: bool = False) -> None:
1619
convert_helper = (
1720
"you can convert it to an openapi 3.0 spec by going to https://editor.swagger.io/, "
1821
+ "pasting your spec and selecting 'Edit' -> 'Convert to OpenAPI 3.0' from the Menu "
19-
+ "and then retry with the converted file."
22+
+ "and then retry with the converted file. Alternatively you can run the generator "
23+
+ "with the --allow-openapi-2 flag. The generated result usually improves if you convert "
24+
+ "your spec to 3.0 thouhg."
2025
)
2126

2227
super().__init__(
@@ -36,3 +41,11 @@ class DltUnparseableSpecException(DltOpenAPITerminalException):
3641
def __init__(self) -> None:
3742

3843
super().__init__("Could not parse selected spec, please provide a valid YAML or JSON document.")
44+
45+
46+
class DltNoEndpointsDiscovered(DltOpenAPITerminalException):
47+
def __init__(self, enabled_methods: List[str]):
48+
super().__init__(
49+
f"Did not find any endpoint with http methods {enabled_methods} in provided OpenAPI spec. "
50+
+ "Please check your spec if endpoints with these methods exist or add additional methods in your config."
51+
)

dlt_init_openapi/parser/endpoints.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class Response:
3030
osp_response: osp.Response
3131
schema: Optional[SchemaWrapper]
3232
status_code: str
33+
description: str
3334
# detected values
3435
detected_payload: Optional[DataPropertyPath] = None
3536
detected_primary_key: Optional[str] = None
@@ -144,6 +145,13 @@ def default_for_param(self, location: Literal["path", "query"], param_name: str)
144145
return p.default
145146
return self.context.config.parameter_default_value
146147

148+
@property
149+
def render_description(self) -> Optional[str]:
150+
description = self.description or self.path_description
151+
if not description:
152+
return None
153+
return description.replace("\n", " ")
154+
147155
@classmethod
148156
def from_operation(
149157
cls,
@@ -168,11 +176,18 @@ def from_operation(
168176
response_schema = context.response_from_reference(response_ref)
169177
content_schema: Optional[SchemaWrapper] = None
170178
for content_type, media_type in (response_schema.content or {}).items():
171-
if content_type.endswith("json") and media_type.media_type_schema:
179+
if (content_type.endswith("json") or content_type == "*/*") and media_type.media_type_schema:
172180
content_schema = SchemaWrapper.from_reference(media_type.media_type_schema, context)
173181
break
174182

175-
responses.append(Response(osp_response=response_schema, schema=content_schema, status_code=status_code))
183+
responses.append(
184+
Response(
185+
osp_response=response_schema,
186+
schema=content_schema,
187+
status_code=status_code,
188+
description=response_schema.description,
189+
)
190+
)
176191

177192
return cls(
178193
method=method,

0 commit comments

Comments
 (0)