ansible
diff --git a/‎metrics_utility/library/README.md‎
Lines changed: 94 additions & 12 deletions b/‎metrics_utility/library/README.md‎
Lines changed: 94 additions & 12 deletions
diff --git a/‎metrics_utility/library/storage.py‎
Lines changed: 0 additions & 83 deletions b/‎metrics_utility/library/storage.py‎
Lines changed: 0 additions & 83 deletions
diff --git a/‎metrics_utility/library/storage/__init__.py‎
Lines changed: 13 additions & 0 deletions b/‎metrics_utility/library/storage/__init__.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎metrics_utility/library/storage/crc.py‎
Lines changed: 118 additions & 0 deletions b/‎metrics_utility/library/storage/crc.py‎
Lines changed: 118 additions & 0 deletions
@@ -4,6 +4,7 @@ This is a Python library for metrics-utility, exposing all the functionality in
 
 It provides an abstraction over collectors, packaging and storage, extraction, rollups, dataframes and reports, as well as helper functions for tempdirs, locking, and datetime handling.
 
+
 ### Abstractions
 
 #### Collector
@@ -23,6 +24,7 @@ should raise an exception when passed invalid values or a bad DB connection, but
 
 Files created by collectors are only cleaned up when called by Package, otherwise rely on having been created inside a per-job tempdir (see helpers), which then gets cleaned up.
 
+
 #### Package
 
 When multiple collectors are called, or the same collector is called multiple times, they are independent of each other.
@@ -37,28 +39,112 @@ For grouping things together, we have a Package class, which takes a list of ini
 
 Such tarball can then be passed to a Storage class, and gets cleaned up afterwards.
 
+
 #### Storage
 
 Storage objects serve to provide a shared interface for various storage modes. Each can be initialized with an appropriate configuration, and can retrieve or save objects from/to long-term storage.
 
 Mainly S3 and local directories are supported,
 but the Storage mechanism can also be used to push the data to cloud APIs or to save it in a local DB.
 
-* `StorageDirectory(base_path='./')`
-* `StorageS3(...bucket, auth, server...)`
-* `StorageCRC(...server, auth...)`
+Common API:
+
+* `storage.put(name, ...)` - should upload to storage, and retry/raise on failure.
+    * `storage.put(name, dict=data)` - uploads a dict, likely as json data, or a .json file
+    * `storage.put(name, filename=path)` - uploads a local file (by name)
+    * `storage.put(name, fileobj=handle)` - uploads an opened local file or a compatible object (by a file-like handle)
+* `storage.get(name)` - (context manager) should download from storage into a temporary file, yield the temporary filename, and remove the file again.
+
+Also supported - `exists(name) -> Bool`, `remove(name)`, `glob(pattern) -> [filenames]`.
+
+Implemented storage classes:
+
+```
+# StorageDirectory - local directory structure under base_path
+#
+# base_path = METRICS_UTILITY_SHIP_PATH
+
+StorageDirectory(
+    base_path='./',
+)
+```
+
+```
+# StorageS3 - S3 or minio
+#
+# bucket = METRICS_UTILITY_BUCKET_NAME
+# endpoint = METRICS_UTILITY_BUCKET_ENDPOINT
+# region = METRICS_UTILITY_BUCKET_REGION
+# access_key = METRICS_UTILITY_BUCKET_ACCESS_KEY
+# secret_key = METRICS_UTILITY_BUCKET_SECRET_KEY
+
+StorageS3(
+    bucket='name',
+    endpoint='http://localhost:9000', # or 'https://s3.us-east.example.com'
+    region='us-east-1', # optional
+    access_key='...',
+    secret_ley='...',
+)
+```
+
+```
+# StorageSegment - segment analytics (put-only)
+#
+# debug = bool
+# user_id = string, passed to analytics.track
+# write_key = https://segment.com/docs/connections/sources/catalog/libraries/server/python/#getting-started
+
+StorageSegment(
+    debug=False,
+    user_id='unknown',
+    write_key='...',
+)
+```
+
+```
+# StorageCRC - console.redhat.com, using service accounts (put-only)
+#
+# client_id = METRICS_UTILITY_SERVICE_ACCOUNT_ID
+# client_secret = METRICS_UTILITY_SERVICE_ACCOUNT_SECRET
+# ingress_url = METRICS_UTILITY_CRC_INGRESS_URL
+# proxy_url = METRICS_UTILITY_PROXY_URL
+# sso_url = METRICS_UTILITY_CRC_SSO_URL
+# verify_cert_path = '/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem'
+
+StorageCRC(
+    client_id='00000000-0000-0000-0000-000000000000',
+    client_secret='...',
+    ingress_url='https://console.redhat.com/api/ingress/v1/upload',
+    proxy_url=None,
+    sso_url='https://sso.redhat.com/auth/realms/redhat-external/protocol/openid-connect/token',
+    verify_cert_path='/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem'
+)
+```
+
+```
+# StorageCRCMutual - console.redhat.com, using mutual tls (put-only)
+#
+# ingress_url = METRICS_UTILITY_CRC_INGRESS_URL
+# proxy_url = METRICS_UTILITY_PROXY_URL
+# session_cert = ('/etc/pki/consumer/cert.pem', '/etc/pki/consumer/key.pem')
+# verify_cert_path = '/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem'
+
+StorageCRCMutual(
+    ingress_url='https://console.redhat.com/api/ingress/v1/upload',
+    proxy_url=None,
+    session_cert=('/etc/pki/consumer/cert.pem', '/etc/pki/consumer/key.pem'),
+    verify_cert_path='/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem'
+)
+```
 
-`storage.put(name, data)` - should upload to storage, and retry/raise on failure.
-`storage.get(name)` - (context manager) should download from storage into a temporary file, yield the temporary filename, and remove the file again.
-
-The supported `data` formats would still be a local file, an array of local files (crc), or JSON/dict data (crc).
 
 #### Extractors
 
 The opposite of `Package`, an extractor can take a set of files (obtained from storage.get), and read a set of dataframes from them, optionally filtered to select a subset of dataframes to load.
 
 The returned dataframes are raw, but compatible with the `add_*` methods of our named Dataframe classes.
 
+
 #### Dataframes
 
 A pandas dataframe object with extras - a dataframe always knows about its fields and indexes even when empty,
@@ -68,13 +154,9 @@ and a `to_csv` / `to_parquet` / `to_json` set of methods to convert to storable
 
 A rollup is the process of building a dataframe from raw csv files, and saving the grouped/aggregated result back into a parquet file.
 
+
 #### Reports
 
 Reports are predefined classes which take a set of dataframes, along with additional config, and create a XLSX file with a specific report. ReportCCSP, ReportCCSPv2 and ReportRenewalGuidance are implemented.
 
 The xlsx file can again be passed to storage.
-
-
-### helpers
-
-TODO lock temp date
 
@@ -0,0 +1,13 @@
+from .crc import StorageCRC, StorageCRCMutual
+from .directory import StorageDirectory
+from .s3 import StorageS3
+from .segment import StorageSegment
+
+
+__all__ = [
+    'StorageCRC',
+    'StorageCRCMutual',
+    'StorageDirectory',
+    'StorageS3',
+    'StorageSegment',
+]
@@ -0,0 +1,118 @@
+import json
+
+from importlib.metadata import version
+
+import requests
+
+
+class Base:
+    def __init__(self, **settings):
+        self.ingress_url = settings.get('ingress_url', 'https://console.redhat.com/api/ingress/v1/upload')
+        self.proxy_url = settings.get('proxy_url')
+        self.verify_cert_path = settings.get('verify_cert_path', '/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem')
+
+    def _session(self):
+        session = requests.Session()
+        session.headers = {
+            'User-Agent': f'metrics-utility {version("metrics-utility")}',
+        }
+
+        session.verify = self.verify_cert_path
+        session.timeout = (31, 31)
+
+        return session
+
+    def _proxies(self):
+        if not self.proxy_url:
+            return {}
+
+        return {'https': self.proxy_url}
+
+    def put(self, artifact_name, *, filename=None, fileobj=None, dict=None):
+        # FIXME: only for .tar.gz
+        tgz_content_type = 'application/vnd.redhat.aap-billing-controller.aap_billing_controller_payload+tgz'
+
+        if filename:
+            with open(filename, 'rb') as f:
+                self._put((artifact_name, f, tgz_content_type))
+
+        if fileobj:
+            self._put((artifact_name, fileobj, tgz_content_type))
+
+        if dict:
+            self._put((artifact_name, json.dumps(dict)))
+
+    def _put(self, file_tuple):
+        response = self._request({'file': file_tuple})
+
+        # Accept 2XX status_codes
+        if response.status_code >= 300:
+            raise Exception(f'{self.__class__.__name__}: Upload failed with status {response.status_code}: {response.text}')
+
+
+class StorageCRC(Base):
+    def __init__(self, **settings):
+        super().__init__(**settings)
+
+        self.sso_url = settings.get('sso_url', 'https://sso.redhat.com/auth/realms/redhat-external/protocol/openid-connect/token')
+        self.client_id = settings.get('client_id')
+        self.client_secret = settings.get('client_secret')
+
+        if not self.client_id:
+            raise Exception('StorageCRC: client_id not set')
+
+        if not self.client_secret:
+            raise Exception('StorageCRC: client_secret not set')
+
+    def _bearer(self):
+        response = requests.post(
+            self.sso_url,
+            data={
+                'client_id': self.client_id,
+                'client_secret': self.client_secret,
+                'grant_type': 'client_credentials',
+            },
+            headers={'Content-Type': 'application/x-www-form-urlencoded'},
+            timeout=(31, 31),
+            verify=self.verify_cert_path,
+        )
+
+        return json.loads(response.content)['access_token']
+
+    def _request(self, files):
+        session = self._session()
+
+        access_token = self._bearer()
+        session.headers['authorization'] = f'Bearer {access_token}'
+
+        return session.post(
+            self.ingress_url,
+            files=files,
+            proxies=self._proxies(),
+        )
+
+
+class StorageCRCMutual(Base):
+    def __init__(self, **settings):
+        super().__init__(**settings)
+
+        self.session_cert = settings.get(
+            'session_cert',
+            (
+                '/etc/pki/consumer/cert.pem',
+                '/etc/pki/consumer/key.pem',
+            ),
+        )
+
+    def _request(self, files):
+        session = self._session()
+
+        # a single file (containing the private key and the certificate)
+        # or a tuple of both files paths (cert_file, keyfile)
+        session.cert = self.session_cert
+
+        return session.post(
+            self.ingress_url,
+            files=files,
+            proxies=self._proxies(),
+        )