Skip to content

Commit b76d8c8

Browse files
[CX-2319]: Update impact analysis script
1 parent 98800a4 commit b76d8c8

File tree

3 files changed

+96
-26
lines changed

3 files changed

+96
-26
lines changed

analyze_impact.py

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010

1111

1212
def get_linked_columns(
13-
data_source_id: int,
14-
table_path: Tuple[str, ...],
15-
columns: List[str]):
16-
13+
data_source_id: int,
14+
table_path: Tuple[str, ...],
15+
columns: List[str],
16+
):
1717
# First we need to get uids of the table and its columns
1818
table_data = client.execute(
1919
load_query('queries/table_by_path.gql'),
@@ -49,26 +49,35 @@ def get_linked_columns(
4949
'allowedList': column_uids,
5050
})
5151

52-
columns_to_ignore = set(column_name_to_uid.values()) - set(column_uids)
5352
connected_columns = set()
5453
for edge in lineage_data['lineage']['edges']:
5554
if edge['sourceUid'] is None or edge['destinationUid'] is None:
56-
continue # this is an "off-chart" edge
55+
continue # this is an "off-chart" edge
5756

5857
connected_columns.add(edge['sourceUid'])
5958
connected_columns.add(edge['destinationUid'])
6059

6160
columns_per_table = defaultdict(set)
62-
for col in lineage_data['lineage']['entities']:
63-
uid = col.get('uid')
64-
if uid is None:
65-
continue # that's not a column
61+
for tabular_entity in lineage_data['lineage']['entities']:
62+
print(
63+
tabular_entity['__typename'],
64+
tabular_entity.get(
65+
'prop', {},
66+
).get('path') or tabular_entity.get('name')
67+
)
6668

67-
if uid not in connected_columns:
68-
continue
69+
for col in tabular_entity.get('columns', []):
70+
uid = col.get('uid')
71+
if uid is None:
72+
continue # that's not a column
6973

70-
table = col['table']['prop']['path']
71-
columns_per_table[table].add(col['prop']['name'])
74+
if uid not in connected_columns:
75+
continue
76+
77+
column_name = col.get('prop', {}).get('name') or col.get('name') or uid
78+
print(f' - {column_name}')
79+
if tags := col.get('tags', []):
80+
print(' Tags:', ', '.join([tag['name'] for tag in tags]))
7281

7382
return columns_per_table
7483

@@ -87,7 +96,7 @@ def main():
8796
table_path = tuple(args.full_table_name.split('.'))
8897

8998
try:
90-
columns_per_table = get_linked_columns(
99+
get_linked_columns(
91100
args.data_source_id,
92101
table_path,
93102
args.columns,
@@ -97,11 +106,5 @@ def main():
97106
print('Names are case sensitive')
98107
sys.exit(-1)
99108

100-
for table, column_set in sorted(columns_per_table.items()):
101-
for column in sorted(column_set):
102-
# that's a hacky and incorrect way to unquote
103-
unquoted_table = table.replace('"', '')
104-
print(unquoted_table, column)
105-
106109
if __name__ == '__main__':
107110
main()

common.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,25 @@
1+
from pathlib import Path
12
from typing import Iterable
23
import os
34
import sys
45

56
from gql import gql, Client
67
from gql.transport.aiohttp import AIOHTTPTransport
78

9+
env_file = Path(__file__).parent / '.env'
10+
if env_file.is_file():
11+
lines = env_file.read_text().splitlines()
12+
for line in lines:
13+
name, value = line.split('=')
14+
os.environ[name] = value
15+
816
token = os.environ.get('DATAFOLD_API_KEY')
917
if token is None:
10-
print('Please set DATAFOLD_API_KEY environment variable')
18+
print('Please set DATAFOLD_API_KEY environment variable.\n')
19+
print(
20+
'You can do that by writing it into `.env` file '
21+
'in the root directory of this project. It will be .gitignore-d.'
22+
)
1123
sys.exit(-1)
1224

1325
host = os.environ.get('DATAFOLD_HOST', 'https://app.datafold.com')

queries/lineage_analyze_impact.gql

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,69 @@ query GetLineageByUID(
1818
) {
1919

2020
entities {
21-
... on Column {
21+
__typename
22+
23+
...on Table {
2224
uid
2325
prop {
24-
name
26+
path
2527
}
26-
table {
28+
columns {
2729
prop {
28-
path
30+
dbType
31+
name
32+
type
33+
__typename
34+
}
35+
uid
36+
tags {
37+
name
38+
}
39+
}
40+
}
41+
42+
...on DataAppCommon {
43+
name
44+
}
45+
46+
...on DataAppLookerView {
47+
name
48+
columns {
49+
...on LookerViewDimension {
50+
uid
51+
name
52+
}
53+
...on LookerViewMeasure {
54+
uid
55+
name
56+
}
57+
}
58+
}
59+
60+
...on DataAppLookerExplore {
61+
name
62+
columns {
63+
...on LookerViewDimension {
64+
uid
65+
name
66+
}
67+
...on LookerViewMeasure {
68+
uid
69+
name
70+
}
71+
}
72+
}
73+
74+
...on DataAppLookerLook {
75+
name
76+
columns {
77+
...on LookerViewDimension {
78+
uid
79+
name
80+
}
81+
...on LookerViewMeasure {
82+
uid
83+
name
2984
}
3085
}
3186
}

0 commit comments

Comments
 (0)