From 569f164a296987c57e7285348e9b12b708deb1e0 Mon Sep 17 00:00:00 2001 From: "algodeveloper1@free.fr" <algodeveloper1@free.fr> Date: Fri, 4 Mar 2022 12:54:47 +0000 Subject: [PATCH] s3-download ingest notebook BIOMASS-2371 --- edav/user-data-ingest-from-module.ipynb | 151 ++++++++++++++++ .../edav_form-checkpoint.py | 171 ++++++++++++++++++ .../edav_ingest-checkpoint.py | 48 +++++ .../edav_raster_utils-checkpoint.py | 41 +++++ .../edav_s3_utils-checkpoint.py | 39 ++++ .../ingest-user-product-checkpoint.py | 65 +++++++ .../__pycache__/edav_form.cpython-37.pyc | Bin 0 -> 4209 bytes .../__pycache__/edav_ingest.cpython-37.pyc | Bin 0 -> 1147 bytes .../edav_raster_utils.cpython-37.pyc | Bin 0 -> 1037 bytes .../__pycache__/edav_s3_utils.cpython-37.pyc | Bin 0 -> 1193 bytes edav/user-data-ingest-scripts/edav_form.py | 171 ++++++++++++++++++ edav/user-data-ingest-scripts/edav_ingest.py | 48 +++++ .../edav_raster_utils.py | 41 +++++ .../user-data-ingest-scripts/edav_s3_utils.py | 39 ++++ .../ingest-user-product.py | 65 +++++++ init.sh | 7 + 16 files changed, 886 insertions(+) create mode 100644 edav/user-data-ingest-from-module.ipynb create mode 100644 edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_form-checkpoint.py create mode 100644 edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_ingest-checkpoint.py create mode 100644 edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_raster_utils-checkpoint.py create mode 100644 edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_s3_utils-checkpoint.py create mode 100644 edav/user-data-ingest-scripts/.ipynb_checkpoints/ingest-user-product-checkpoint.py create mode 100644 edav/user-data-ingest-scripts/__pycache__/edav_form.cpython-37.pyc create mode 100644 edav/user-data-ingest-scripts/__pycache__/edav_ingest.cpython-37.pyc create mode 100644 edav/user-data-ingest-scripts/__pycache__/edav_raster_utils.cpython-37.pyc create mode 100644 edav/user-data-ingest-scripts/__pycache__/edav_s3_utils.cpython-37.pyc create mode 100644 edav/user-data-ingest-scripts/edav_form.py create mode 100644 edav/user-data-ingest-scripts/edav_ingest.py create mode 100644 edav/user-data-ingest-scripts/edav_raster_utils.py create mode 100644 edav/user-data-ingest-scripts/edav_s3_utils.py create mode 100644 edav/user-data-ingest-scripts/ingest-user-product.py diff --git a/edav/user-data-ingest-from-module.ipynb b/edav/user-data-ingest-from-module.ipynb new file mode 100644 index 0000000..da11d4a --- /dev/null +++ b/edav/user-data-ingest-from-module.ipynb @@ -0,0 +1,151 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4de51eb6", + "metadata": {}, + "source": [ + "<h1>Sample user data ingestion notebook</h1>" + ] + }, + { + "cell_type": "markdown", + "id": "8b45f9ee", + "metadata": {}, + "source": [ + "As a first step add the ingestion module location to path. In the future the ingestion module could be automatically put in the python module path." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "5a994d73", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "sys.path.insert(0, os.path.abspath('./user-data-ingest-scripts'))" + ] + }, + { + "cell_type": "markdown", + "id": "11cb449a", + "metadata": {}, + "source": [ + "Then import the product ingestion utils from the edav_ingest module. The module uses the configuration information set in `/projects/.maap/edav.ini`.\n", + "\n", + "It depends on rasterio and ipywidgets libraries that are currently not available in the conda environment. To prepare the environment the following commands were run before importing the script:\n", + "\n", + "`conda install --freeze-installed rasterio`\n", + "\n", + "`conda install --freeze-installed ipywidgets`\n", + "\n", + "`jupyter labextension install @jupyter-widgets/jupyterlab-manager`" + ] + }, + { + "cell_type": "markdown", + "id": "3987a8cd", + "metadata": {}, + "source": [ + "<h3>Local file ingestion</h3>\n", + "\n", + "First we run the local ingestion script passing the location of a local product file. This will generate a form. Complete the missing information and submit it to complete the ingestion. The function uses the `maap-s3.py` script internally to upload the product to S3 (in `[user_data_remote_s3_path]/[user_data_upload_location]/[USER_EMAIL]/` as specified in `/projects/.maap/edav.ini` e.g. `maap-scientific-data/shared/edav/edav_esa-maap.org`). Be sure it is available and that the information in `/projects/.maap/auth.ini` are correct" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a089ae96", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b97de56bbeea4429b6e7caf1a2739756", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(Text(value='georef_UK_classif', description='Dataset ID*:', layout=Layout(width=…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from edav_form import generateIngestionFormForLocalProduct\n", + "\n", + "generateIngestionFormForLocalProduct('/projects/data/georef_UK_classif.tif')" + ] + }, + { + "cell_type": "markdown", + "id": "bb1cbec4", + "metadata": {}, + "source": [ + "<h3>Remote file ingestion</h3>\n", + "\n", + "To ingest a product already on S3 user data location, run the remote ingestion script passing the relative location of the product (relative to the `user_data_local_s3_mount` set in `/projects/.maap/edav.ini`). The assumption is that the S3 location is locally mounted in the jupyter env (e.g. in /project/s3-drive/user-data, configurable in `/projects/.maap/edav.ini`) in order to extract the required information from the product metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "13f4e740", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5f1d32eb92124389907392e2f02baf15", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(Text(value='georef_Ecosse_classif_test', description='Dataset ID*:', layout=Layo…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from edav_form import generateIngestionFormForRemoteProduct\n", + "\n", + "generateIngestionFormForRemoteProduct('GPU_TEST_TIF/georef_Ecosse_classif_test.tiff')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87872c47", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Maap", + "language": "python", + "name": "maap" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_form-checkpoint.py b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_form-checkpoint.py new file mode 100644 index 0000000..9129c22 --- /dev/null +++ b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_form-checkpoint.py @@ -0,0 +1,171 @@ +import configparser +import datetime +import ipywidgets as widgets +from IPython.display import display, Markdown, HTML + +from edav_raster_utils import extractRasterMetadataFromProduct +from edav_ingest import uploadLocalProductAndIngest, ingestProduct + +def generateIngestionForm(meta, on_submit): + productDate = datetime.datetime.utcnow().replace(minute=0, hour=0) + + if 'productDate' in meta: + try: + productDate = datetime.datetime.strptime(meta['productDate'], '%Y-%m-%dT%H:%M:%SZ') + except BaseException: + pass + + fieldLayout = widgets.Layout(width='50%') + + datasetIdField = widgets.Text( + value=(meta['datasetId'] if 'datasetId' in meta else ''), + placeholder='Set the dataset id', + description='Dataset ID*:', + disabled=False, + layout=fieldLayout + ) + titleField = widgets.Text( + value=(meta['title'] if 'title' in meta else ''), + placeholder='Set the dataset title', + description='Title*:', + disabled=False, + layout=fieldLayout + ) + descriptionField = widgets.Textarea( + value=(meta['description'] if 'description' in meta else ''), + placeholder='Insert a dataset description', + description='Description*:', + disabled=False, + layout=fieldLayout + ) + geolocatedField = widgets.Valid( + value=(meta['geolocated'] if 'geolocated' in meta else False), + description='Geolocated', + disabled=True, + readout='' + ) + geometryField = widgets.Textarea( + value=(meta['geometry'] if 'geometry' in meta else ''), + description='Geometry*:', + placeholder='Insert a GeoJSON polygon', + disabled=True, + rows=5, + layout=fieldLayout + ) + dateField = widgets.DatePicker( + value=productDate, + description='Product date*:', + disabled=False + ) + hourField = widgets.Dropdown( + options=list(map(lambda i: f'{i:02}', range(0, 24))), + value=productDate.strftime('%H'), + disabled=False, + layout=widgets.Layout(width='50px') + ) + minuteField = widgets.Dropdown( + options=list(map(lambda i: f'{i:02}', range(0, 60))), + value=productDate.strftime('%M'), + disabled=False, + layout=widgets.Layout(width='50px') + ) + submitButton = widgets.Button( + description='Submit', + disabled=False, + button_style='info', # 'success', 'info', 'warning', 'danger' or '' + layout=widgets.Layout(width='50%', margin='20px 0 0 0') + ) + + if geolocatedField.value is False: + geometryField.disabled=False + + output = widgets.Output() + + def set_error_message(message): + display(Markdown("<span style='color: red'>Error: {}</span>".format(message))) + + def on_form_submit(b): + with output: + output.clear_output() + if not datasetIdField.value: + set_error_message('Dataset ID is required') + return + if not titleField.value: + set_error_message('Title is required') + return + if not descriptionField.value: + set_error_message('Description is required') + return + if not dateField.value: + set_error_message('Product date is required') + return + if not geometryField.value: + set_error_message('Geometry is required') + return + + submitButton.disabled = True + submitButton.icon = 'spinner' + submitButton.button_style = '' + + meta['datasetId'] = datasetIdField.value + meta['subDatasetId'] = datasetIdField.value + meta['title'] = titleField.value + if (descriptionField.value): + meta['dataset_description'] = descriptionField.value + else: + del meta['dataset_description'] + meta['productDate'] = "{}T{}:{}:00Z".format(dateField.value.strftime('%Y-%m-%d'), hourField.value, minuteField.value) + + result = on_submit(meta) + + submitButton.disabled = False + submitButton.icon = '' + submitButton.button_style = 'info' + + if result['status'] is 'error': + set_error_message(result['message']) + display(HTML(result['response'].text )) + else: + display(result['message']) + display(result['response'].json()) + + + submitButton.on_click(on_form_submit) + + display(widgets.VBox([ + widgets.HBox([datasetIdField]), + widgets.HBox([titleField]), + widgets.HBox([descriptionField]), + widgets.HBox([geolocatedField]), + widgets.HBox([geometryField]), + widgets.HBox([dateField, hourField, minuteField]), + widgets.HBox([submitButton]), + widgets.HBox([output]) + ])) + + + +def generateIngestionFormForLocalProduct(productLocation): + productMetadata = extractRasterMetadataFromProduct(productLocation) + print(productMetadata) + generateIngestionForm(productMetadata, uploadLocalProductAndIngest) + + +def generateIngestionFormForRemoteProduct(productLocation): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + sourceBasePath = config['edav']['user_data_local_s3_mount'] + targetBasePath = config['edav']['user_data_remote_s3_path'] + + localFilePath = "{}/{}".format(sourceBasePath, productLocation) + remoteFilePath = "{}/{}".format(targetBasePath, productLocation) + + meta = extractRasterMetadataFromProduct(localFilePath) + + + meta['source'] = "/vsis3/{}".format(remoteFilePath) + generateIngestionForm(meta, ingestProduct) + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_ingest-checkpoint.py b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_ingest-checkpoint.py new file mode 100644 index 0000000..b0735a1 --- /dev/null +++ b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_ingest-checkpoint.py @@ -0,0 +1,48 @@ +import os +import configparser +import json +import requests + +from edav_s3_utils import uploadLocalFileToS3 + +def ingestProduct(productMetadata): + + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + ingestionApiUrl = config['edav']['user_data_ingestion_url'] + + data = {} + data[productMetadata['datasetId']] = productMetadata + + r = requests.post( ingestionApiUrl, data={"metadata": json.dumps(data)}) + if r.status_code != 200: + return { + "status": "error", + "message": "Product ingestion failed", + "response": r + } + else: + return { + "status": "success", + "message": "Product succesfully ingested", + "response": r + } + + +def uploadLocalProductAndIngest(productMetadata): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + targetBasePath = config['edav']['user_data_remote_s3_path'] + + remoteLocation = uploadLocalFileToS3(productMetadata['source']) + print(remoteLocation) + productMetadata['source'] = "/vsis3/{}".format(remoteLocation) + + return ingestProduct(productMetadata) + + + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_raster_utils-checkpoint.py b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_raster_utils-checkpoint.py new file mode 100644 index 0000000..a6b49e3 --- /dev/null +++ b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_raster_utils-checkpoint.py @@ -0,0 +1,41 @@ +import os +import rasterio +from rasterio import warp +import json + +def extractRasterMetadataFromProduct(productPath): + with rasterio.open(productPath, 'r') as product: + meta = { + "source": productPath, + "datasetId": os.path.splitext(os.path.basename(productPath))[0], + "single_multiband": "{}".format(product.count), + "grid": False, + "gridType":"Custom", + "dataset_type": "Raster", + "dataset_dimension":"3", + "dataset_dimension_description":"Lat Long Time", + "defaultViewMode":["band1"] + } + if product.crs: + meta['geolocated'] = True + + bbox = product.bounds + geometry = g = warp.transform_geom( + product.crs, + {'init':'EPSG:4326'}, + { + 'type': 'Polygon', + 'coordinates': [[ + [bbox.left, bbox.bottom], + [bbox.right, bbox.bottom], + [bbox.right, bbox.top], + [bbox.left, bbox.top], + [bbox.left, bbox.bottom] + ]] + } + ) + meta['geometry'] = json.dumps(geometry) + else: + meta['geolocated'] = False + + return meta \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_s3_utils-checkpoint.py b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_s3_utils-checkpoint.py new file mode 100644 index 0000000..0db89e8 --- /dev/null +++ b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_s3_utils-checkpoint.py @@ -0,0 +1,39 @@ +import os +import configparser + +def S3Login(): + if not("MAAP_EMAIL" in os.environ and "MAAP_PASSWORD" in os.environ): + config = configparser.ConfigParser() + #you have to fill in the auth.ini file with your credentials, check the README.md + config.read('/projects/.maap/auth.ini') + + #Location of the credentials + email = config['auth']['email'] + password = config['auth']['password'] + + #Set env variables + os.environ["MAAP_EMAIL"] = email + os.environ["MAAP_PASSWORD"] = password + + os.system("maap-s3.py login $MAAP_EMAIL $MAAP_PASSWORD") + +def S3Upload(source, target): + os.system("maap-s3.py upload {} {}".format(source, target)) + +def uploadLocalFileToS3(localFilePath): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + targetBasePath = config['edav']['user_data_remote_s3_path'] + uploadLocation = config['edav']['user_data_upload_location'] + + S3Login() + target = "{}/{}/{}/{}".format(targetBasePath, uploadLocation, os.environ["MAAP_EMAIL"].replace('@', '_'), os.path.basename(localFilePath)) + + S3Upload(localFilePath, target) + return target + + + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/.ipynb_checkpoints/ingest-user-product-checkpoint.py b/edav/user-data-ingest-scripts/.ipynb_checkpoints/ingest-user-product-checkpoint.py new file mode 100644 index 0000000..b9af011 --- /dev/null +++ b/edav/user-data-ingest-scripts/.ipynb_checkpoints/ingest-user-product-checkpoint.py @@ -0,0 +1,65 @@ +#!/projects/.conda/envs/maap/bin/python + +import os, sys, argparse +import configparser +import logging + +sys.path.append('./') +from edav_ingest import ingestProduct, uploadLocalProductAndIngest +from edav_raster_utils import extractRasterMetadataFromProduct + +logging.basicConfig(stream=sys.stdout, level=os.environ.get("LOG_LEVEL", "INFO")) + +def ingestUserProduct(productLocation, title, description, productDate, datasetId=None, isLocal=False): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + sourceBasePath = config['edav']['user_data_local_s3_mount'] + targetBasePath = config['edav']['user_data_remote_s3_path'] + + if isLocal is True: + localFilePath = productLocation + else: + localFilePath = "{}/{}".format(sourceBasePath, productLocation) + + productMeta = extractRasterMetadataFromProduct(localFilePath) + productMeta['title'] = title + productMeta['dataset_description'] = description + productMeta['productDate'] = productDate + if datasetId is not None: + productMeta['datasetId'] = datasetId + productMeta['subDatasetId'] = productMeta['datasetId'] + + if isLocal is True: + result = uploadLocalProductAndIngest(productMeta) + else: + remoteFilePath = "{}/{}".format(targetBasePath, productLocation) + productMeta['source'] = "/vsis3/{}".format(remoteFilePath) + result = ingestProduct(productMeta) + + if result['status'] is 'error': + logging.error(result['message']) + else: + logging.info(result['message']) + + logging.debug(result['response'].text) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Ingest a product in edav user data catalogue') + parser.add_argument('--title', help='A friendly name for the dataset', required=True) + parser.add_argument('--description', help='The dataset description', required=True) + parser.add_argument('--product_date', help='The product date as ISO string', required=True) + parser.add_argument('--dataset_id', default=None, help='Set the dataset id (default is to derive it from the file name)') + parser.add_argument('--is_local', action='store_true', help='A flag indicating if the product is local (default to false). When set the product will be first uploaded to S3 and then ingested') + parser.add_argument('product_location', help='The product location. In case of remote file (--is-local not set), it should be the relative path from the S3 user data mount point') + args = parser.parse_args() + + ingestUserProduct( + productLocation=args.product_location, + title=args.title, + description=args.description, + productDate=args.product_date, + datasetId=args.dataset_id, + isLocal=args.is_local + ) diff --git a/edav/user-data-ingest-scripts/__pycache__/edav_form.cpython-37.pyc b/edav/user-data-ingest-scripts/__pycache__/edav_form.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4de150d2fd54cfeecf2e1bc7aed9d4c977758932 GIT binary patch literal 4209 zcma)9OK%&=5uO)^!>68>rM0t5d#yNBqDSm?9*Wl3^?DU)VQUpjSbOI}jMyV-<lzj{ z)3hy`L=H9}8i|q|atM%%bW9Kg`7t@oH7B0~1VK*us%J=1UN0ODr>45Py1Kfnx~lq4 zsgzgn`}KdW?EGa)QT|Dd{!at)3EqeSFomhUVkckKR%KhWb$m77Xk_e6!?evt*3PQL zqx)7PXXhkt`1wY`E;NdE5$y~!Un+KqWtoL<ndMj>-w~#6Db>Pj%%p5o^*GDCsOdY0 zv31|!k6HLMh>f-F`}eA9TydWX?$pHtClW5d?+S-G!dc~EW0QxhRTot)zSL^^p~LQl zb;nPsp9X9_*mENhmpu7R>2+Er{Y<<iygJ@!6I7ubC@1Qva-cC4k9MqeHKzA6k4?UI zI3;`0V)~52j8pVIRDwxE5ys;@UsV-)zE%S5ks>k&CN29|?drg@EGuQ~V{CQ?9L%Ck zV1<s(ipQB(#Ghp)$@3fVlv18D8yWBvz+<sd$@6#cjHWzeY<$3@fhWf%B+oy<Ga-2j zY?58Tj8n%(*N{>!N{)Yn<6<ghn$3XcJ?L}k*nCAg=Q|p^%nIx~$NDQ3?Sd$Z5__M0 z@KO<F>`mo`hDYsaCnN02sme${lU=1Xvk#ArS2R;H=GtJ)?9iC+zGaMwF{A7{X8oR& zmSI&kcdW7RLtegP9E`ETW9#=S{~Igx^xC+{oJ^c5uPpWhHvdw2l{-?7)FbUkKQfLo zN9IxX$U4d$<&O%xMYg~eUs~NvFMl%G$(*XHA}(O`6yA$HWlufD3V{^{nAXQSMjz|x z15E81J#$Y_N96}}i?BWGRDFRNm;(1++Tbdj3;t;~yr3oA*w_zSJT5d7$Gz(aw>>`l z_55sOewJ;|t}V~rU!L8v+xo4gS$KZtsmH{AoFjjaT(Qn#r9Hmoii+5GD=D$!v33!H zh@rB6cW!yLnu#+{9KYpa9^a|E`=QTVOv>D-&b_Aa!XUPA+?*ZXWwD7v7q-uWhf z_JudaC*<rl_0BExQLLwZc|C|+E-KDIi6PnT(%rL(=9caJo*Vi&m%?Rs{^tXHyK3+< zgh`2f449qeuEU^<ZDB~m75uPG10*)L+@2oH3gQ=AUv5;Ip?|m+2Cp-;$>xx_5fV%? zV}plJBR&qaT}}f4DUGF6^`Qa8TPIu5kG>Qnw(v$4KnJn#PX-LCZN%ErjXx`}jq34> z=#t>SpDx0p2X0+Ni_C3=^T|d>i!O7XEVi&$^JK00Bo3YHt?h=qvCu@^Tk)O74s&jQ z45|OZtF*80xq-`}Y!b&{w^d?Zq22VO+dz^LucwJyn_aVwTT9JnRU^)Z(#%n^LRy^M zg8yrU9BCn0R#oLB`D~qhhMMmId`8G6fY&PknESqMZnbtAo`?-E*bUqH8_=Y(L{C-Y z1u)?ZIDF3wV0Ut~2FmJnf3i^QStep5#>th55@Bou+Le4=#}I%0!ErfsyVD^b%4 zDpJVD*XwXMzFgrhyMFs~&Iy0sU0EcS+b}v=mQ&1+P1<`$z?j*F8%55Z`^Hk^F&wIz zOEPLH$2JB=GJ~k?rFYop8-oY?PXcmyCrH{I_*)Nw?RPg+d{2~9IABjXP@}8892=~& zR{-TBP!M*L@Y2&jSp`KOP*hMJfnp3OIw%LAWcn0l_VP6iDJfsmnFVOjTu27F6*()g zJTS}#tN;wN@V^p7ySSnRCVf9vSm`9w!+a+uE1x1HqTf7FPboo;U=&i`3$?2vWzlRj zkIu$uwvNiikw7P8D4*`-(4L%8NIQ`g7Fri(l!IJi-{7*y4)-U$C;3y!{}4g;7s><W zQ4(yYHri8Xaj@b=@PJ>oJh(x7Oh&JBq>DpA>RiY4aFjULGue+t!_;j%8#TQE=af?| z!HWQPF>39ks;o0kMz$v?0FXlrN6O;!?)LNUGX6_TcH8PF0!}5`D*pg5HY4GPRupHX z7xE8@W^p%ah6uscLR_r-uET4Hn$4CtJMK7z^)TR9z|ODAF_GZA!fUvR{~JUz1S+f8 zIEcakDN5amO$cuJqFRj0sk-ZI)pLC&Q;?gWqeCl5D5ZY5PI)QEC8&Ye?Zky7`K7&d z+LK6OrdnSkWoXY3KJLhP5a(sukt32bNs%WmV=SH7RKP>fqBQ_TweXmDjc<dRi5^qS ztMr&^5$*RiE2WmzqB^M+)%N^5PW1WttJBD%2+y>wvDp&!Abc7liOH-RTi9_#VRV)5 z!1A9ztGh|=%_6Jsxgv_qdr9gwwqXa`BIqCm+L^B$-$Sy53%Q$K{jtk!>n;zQNmcB7 zC~5VE(~L9RL1D}1Xlu-5<AyxI{8IXaZ9(a}4>8j6w(-@S@R@C_0U~97gt7cb1jy&? z!Y)}mQOh2IS<}Rw7-E_DDCt#;e39@a0zV{hgTPG!QsOPbq}U%5Mh50TA@EZI(v}zA zy44gEqu&B3Fo>mU>XcfB*@~K>j-y47^-g@4BjN9ln}UmI>L?_fVTsgny@+`%&C;(f z<lz>HqYy0P<>Y1OqBkWFN`g_6nL3C_T~(+ZRiH3V%Rdzt8%`-q4(D{=z&RU9i$A)n zP%UQn3H_VRn@Q-}TXzbj07>ZIcK$%R(5a@gz`wwEpl%io>XGX10ul0-(}l7pxam+m zjKHcPDC1_+)l%F5uCvUE-l2r6bq$H@63+;ugA~&>nTdW>)E$#$S8$K%n8F+^-`Z$T zpVdVR4aaH9f@r}DJlmks$eyO!s7A4-CX1R{bhFk7TY(tv;i#yDOI=Mz?6)(|yNl1e z?cCy%$ct_QMT|tisk_xIqG1^9dV5WWLvCFBOtv<YmO<5}42DBCp}nbQ`8qzanGCke zi8wm6o0!lp<5c3j4%#KD>Z<3b-HCqvc5(m)rD5Xiv)^XI2Xbv`%K0O-h^SO08|D#@ zit@FzvdT9>t&Z}0`0$?r*m<vcm<AI52?+7X`sSh74}*pDCVf`uaI(HVMmt{P@)BKZ z3D1w@KG+3GPfAniqntJ+aar;sIrWfF%B9I8Y^^kK|MuP6R4_}$DGXaOl#XiFuau%? F{0}LiMI!(J literal 0 HcmV?d00001 diff --git a/edav/user-data-ingest-scripts/__pycache__/edav_ingest.cpython-37.pyc b/edav/user-data-ingest-scripts/__pycache__/edav_ingest.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..14784db28c2ad3b7e96a29d05320cd5a26254626 GIT binary patch literal 1147 zcmaJ=PjAyO6t^8G&9=5f{LxxKXdHXkdf0VB`y(Muf@#v=5+PM4c1uc92iuvnlAgvD zJ^_bLocJbu246Yt8z4?RCtWu#bM&6y^NZi#pATBCRRrtXpXcv?P=tOv<$4hi_JFiw z9C4hYsNQjeE1g87+P%nwouoALBOfE~ary;C0rz<TdxPUs)N5RU7j)di_Cb-SlJQqE zX6ef$6>sF}PK=!umJbvF9RleTm_c)ViDtOObHec@#;C-cbRkF=%}9b~UWrN|PP=Fq z8E;HW!hPe<gVMXi3Fg69GNUCOH+bWtr{3|9uauOpe)mohU*pZCj3CL@@wC&=l^l!M z=>B%bSl$<$ooy$R#8M|k8-*5Xz?or#WHJ)kByut+RBBh;nHJ`V+b|P`ITfm<Zom2k zl~`XWCDpWZq9iY3v$?##Ib_fW-t%mr4KszdjZA3GM#6?l=v+>;unk?rF`Q1HUf-_| zhDDm5FMNTI&^BW^878BgDah0|4=e4Y)~FH;@>Akd0jX<CbE%D`V=X7v=S7z5Xsu3R z5KAt4zFmiQxNKi9O0q%iK3ZR<zn>>>RjS<BELJ~}+5qYA;Y}mC$mEueRgyzjT^IYV ziCeBSTeX+}tc%8O=jXOnn->*_;8#BehDeB4Nt00ABpr|lCOD*yt1eiVXaYzh52!-Z z9td}%$t-YmfoC4~=hPv{Aq#Njm*~R5IJg5L%7wC>8-OUN+lYbQ8RX1-fZ@tQ#bUbB zKhufc>3^Q}0@Z>$s_odiWA4$}U?^3_OjS#ds)x?-$Qjy(sgW=TfU^_u7`5vN?kPDO zR*kP&$Nm1~)=OKoaDT#&D%d`UA9NEK!gVj3xJ}eE&;de3>N>IepsRbpqL!PRE2S_= ks;fM!{HiOr0=wC(Dtfq^0di9D!oBS(R0yHqgTaN}e=jjNN&o-= literal 0 HcmV?d00001 diff --git a/edav/user-data-ingest-scripts/__pycache__/edav_raster_utils.cpython-37.pyc b/edav/user-data-ingest-scripts/__pycache__/edav_raster_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c069341dc4cf9503b3b56414a42f1523abce7dd2 GIT binary patch literal 1037 zcmZWoJ8u**5VpPE&E6}o0t5mS$}2*EAPN*A1R^L>f&z&Ua&y|-ah5&pu2;5?Jh<!V z5k19S$zRB?pms~eKR{HB@4`#in$Pp}c*geQPNzw5eEfd1zZVkn)eFA{L*@~Zxs3n> zXhFn$rGlc&3R*J3=v)_w00Q9eiQqsFNH_e3QRI1-I)0q!I;CF35h1mao*<bo2$oFf z2zQ^d59ed#EE@z9KErTCrgTbyT_F%mq7fY|pu{1B2o@lQMn<9esWE0~iS;?OMhx0p z1f|9}u+2%!>!AaSIfbQBFlZxnGKRSL54SYadFvkz{jw1EU;f0V0ViWV4(;+}WyGfx zv#e~9Dh7U$j~$Q~<Ry8HoxsXhpF66xulJ0-8*hu~$<SC;is)rxtkjNfo^^H;ySA&U zZ0#^Fr3m{;1$Vj)7a29wsdNoUY+|I{0kLfIDl25K92PeBeu8H@2N8QbJguc@UpV)y zmsWmBfIC+vSCzQ_pQ;Daq&lyyH!p!aN^tjAxjf!eAP-$Lld4c@VkJ19SGhfFJl#Kd z_Tb*;-TPu;UlpgBs=CbaS;khXl!m-QcjICVEv40`-BlOsS;4t-T-CC2Oc}@P#J+K{ zsf*mo6YJtW)>$Q`bkUL0Wnx{Js$peCkZL2MKJs9+e~Mz!>ZCH>tLKwAUdSWs(8XeR zU8wWyjdg)lwc`V$Di^}AtW7r(ty(J>rgq<_Ubqk($73df_xtKZ`%mC#_L=Bw$o<&a zK&OFBt=Rxss`YuV%!Y*I?Z(hZz3w}@ju#`1{m0sym)slLyfC-wQ@4gi>Lj)EyWhi< zK6G2Fa({j({~yd<1VR@XqnButvfs1GA{rxcoDpNJiB><a@?{n?eFLMqq3+<KTONF$ h^kom;G%MyWxQ;p(Z<W}LLO%2l)WirejcFUx`~(UkBsBm4 literal 0 HcmV?d00001 diff --git a/edav/user-data-ingest-scripts/__pycache__/edav_s3_utils.cpython-37.pyc b/edav/user-data-ingest-scripts/__pycache__/edav_s3_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..108448b444de66767739d6c7db0c74b1caab7806 GIT binary patch literal 1193 zcmZWo%We}f6tz9irb(%2McV~Zkq}Y@X%^X5s1J&S)Rw9NA!QRx?50yDnUU=jwVkZW zC-4Eul8@j=yk*57psu*rlhPur`1*KkUmxFdvQw*72()j%pM3h^6Y>)$w~YbVg05dc z5Jb?Fbjp!-s9?hTN;*vVB7o5op@?Ag1#Odh{0mNy!yZO#-mLG!s6Rpw)Q~g!IaJGn zl}H~OHu0upObRmbg*T-Xknq=t2-e7$&Tt5r74Ed4o1}231=}QsOac+%DM66Ip{*Qj zZy$B{4z}OycWMqFZMWMW-X6cYTy5qmJCi-5n;Qep^Clme(~V@9SRauM<bWrsjdQN` zmrRMv=eVn(TO0YsdYbi<;ri3N>@$3uy&hVYX&cGmd7`qRt@g6vNz%``(o)&#uA3c| zGhazAY^X1^k%PKt!*Z_zDToRnd?%^~!_D3uV<87wL-$mY1AQs@d2<Ab8-g3&NQQl> z%|EMM-Rh1^l4{^?qjqZ_>0^Gq2!YUL8ncR5rB$y&D@;9rZ#`5V8;&7#r`sa{P-QCU zgcYQqQ}(yo?C^5sUWG<^nsKpyHHKf^Q<y=`L$KjVrUu;989X@~sh+f<;i@l<dt@0F zaAVwVy?05LVWu4&s+IuvC<JU+oEW;JCIBEcun#SOo}kV?U{bJ;S6aqlu+!Kx-ZcT6 z1QWktg=d1ZP=Ku_k%&Y*WiXE?a|M~wsXz8j1)l0(GccnMwwgQq-#aKl=^aFN71ijX z8eIiS8Hobq+?;mS?pDeQb<?cJO_B{S=dZ@i+n>Vc-q0;eyY+d6Z&Y&_@Q>=`Q7Bwl z)d-M{l+05O#q{wS8-L<j4*5X3g4F|C!)wylNh*(ET~+bSLRt46*oq%J3niod86ou$ zuR!PB6J_ackIVKhYqwTl%gjhI*avLHLi&tVsCo=5^`NBb9)Zqsl-$z2<nEeM_RT>i SMyY&>Dr-CygFEBJ*!u%{nI$a% literal 0 HcmV?d00001 diff --git a/edav/user-data-ingest-scripts/edav_form.py b/edav/user-data-ingest-scripts/edav_form.py new file mode 100644 index 0000000..9129c22 --- /dev/null +++ b/edav/user-data-ingest-scripts/edav_form.py @@ -0,0 +1,171 @@ +import configparser +import datetime +import ipywidgets as widgets +from IPython.display import display, Markdown, HTML + +from edav_raster_utils import extractRasterMetadataFromProduct +from edav_ingest import uploadLocalProductAndIngest, ingestProduct + +def generateIngestionForm(meta, on_submit): + productDate = datetime.datetime.utcnow().replace(minute=0, hour=0) + + if 'productDate' in meta: + try: + productDate = datetime.datetime.strptime(meta['productDate'], '%Y-%m-%dT%H:%M:%SZ') + except BaseException: + pass + + fieldLayout = widgets.Layout(width='50%') + + datasetIdField = widgets.Text( + value=(meta['datasetId'] if 'datasetId' in meta else ''), + placeholder='Set the dataset id', + description='Dataset ID*:', + disabled=False, + layout=fieldLayout + ) + titleField = widgets.Text( + value=(meta['title'] if 'title' in meta else ''), + placeholder='Set the dataset title', + description='Title*:', + disabled=False, + layout=fieldLayout + ) + descriptionField = widgets.Textarea( + value=(meta['description'] if 'description' in meta else ''), + placeholder='Insert a dataset description', + description='Description*:', + disabled=False, + layout=fieldLayout + ) + geolocatedField = widgets.Valid( + value=(meta['geolocated'] if 'geolocated' in meta else False), + description='Geolocated', + disabled=True, + readout='' + ) + geometryField = widgets.Textarea( + value=(meta['geometry'] if 'geometry' in meta else ''), + description='Geometry*:', + placeholder='Insert a GeoJSON polygon', + disabled=True, + rows=5, + layout=fieldLayout + ) + dateField = widgets.DatePicker( + value=productDate, + description='Product date*:', + disabled=False + ) + hourField = widgets.Dropdown( + options=list(map(lambda i: f'{i:02}', range(0, 24))), + value=productDate.strftime('%H'), + disabled=False, + layout=widgets.Layout(width='50px') + ) + minuteField = widgets.Dropdown( + options=list(map(lambda i: f'{i:02}', range(0, 60))), + value=productDate.strftime('%M'), + disabled=False, + layout=widgets.Layout(width='50px') + ) + submitButton = widgets.Button( + description='Submit', + disabled=False, + button_style='info', # 'success', 'info', 'warning', 'danger' or '' + layout=widgets.Layout(width='50%', margin='20px 0 0 0') + ) + + if geolocatedField.value is False: + geometryField.disabled=False + + output = widgets.Output() + + def set_error_message(message): + display(Markdown("<span style='color: red'>Error: {}</span>".format(message))) + + def on_form_submit(b): + with output: + output.clear_output() + if not datasetIdField.value: + set_error_message('Dataset ID is required') + return + if not titleField.value: + set_error_message('Title is required') + return + if not descriptionField.value: + set_error_message('Description is required') + return + if not dateField.value: + set_error_message('Product date is required') + return + if not geometryField.value: + set_error_message('Geometry is required') + return + + submitButton.disabled = True + submitButton.icon = 'spinner' + submitButton.button_style = '' + + meta['datasetId'] = datasetIdField.value + meta['subDatasetId'] = datasetIdField.value + meta['title'] = titleField.value + if (descriptionField.value): + meta['dataset_description'] = descriptionField.value + else: + del meta['dataset_description'] + meta['productDate'] = "{}T{}:{}:00Z".format(dateField.value.strftime('%Y-%m-%d'), hourField.value, minuteField.value) + + result = on_submit(meta) + + submitButton.disabled = False + submitButton.icon = '' + submitButton.button_style = 'info' + + if result['status'] is 'error': + set_error_message(result['message']) + display(HTML(result['response'].text )) + else: + display(result['message']) + display(result['response'].json()) + + + submitButton.on_click(on_form_submit) + + display(widgets.VBox([ + widgets.HBox([datasetIdField]), + widgets.HBox([titleField]), + widgets.HBox([descriptionField]), + widgets.HBox([geolocatedField]), + widgets.HBox([geometryField]), + widgets.HBox([dateField, hourField, minuteField]), + widgets.HBox([submitButton]), + widgets.HBox([output]) + ])) + + + +def generateIngestionFormForLocalProduct(productLocation): + productMetadata = extractRasterMetadataFromProduct(productLocation) + print(productMetadata) + generateIngestionForm(productMetadata, uploadLocalProductAndIngest) + + +def generateIngestionFormForRemoteProduct(productLocation): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + sourceBasePath = config['edav']['user_data_local_s3_mount'] + targetBasePath = config['edav']['user_data_remote_s3_path'] + + localFilePath = "{}/{}".format(sourceBasePath, productLocation) + remoteFilePath = "{}/{}".format(targetBasePath, productLocation) + + meta = extractRasterMetadataFromProduct(localFilePath) + + + meta['source'] = "/vsis3/{}".format(remoteFilePath) + generateIngestionForm(meta, ingestProduct) + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/edav_ingest.py b/edav/user-data-ingest-scripts/edav_ingest.py new file mode 100644 index 0000000..b0735a1 --- /dev/null +++ b/edav/user-data-ingest-scripts/edav_ingest.py @@ -0,0 +1,48 @@ +import os +import configparser +import json +import requests + +from edav_s3_utils import uploadLocalFileToS3 + +def ingestProduct(productMetadata): + + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + ingestionApiUrl = config['edav']['user_data_ingestion_url'] + + data = {} + data[productMetadata['datasetId']] = productMetadata + + r = requests.post( ingestionApiUrl, data={"metadata": json.dumps(data)}) + if r.status_code != 200: + return { + "status": "error", + "message": "Product ingestion failed", + "response": r + } + else: + return { + "status": "success", + "message": "Product succesfully ingested", + "response": r + } + + +def uploadLocalProductAndIngest(productMetadata): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + targetBasePath = config['edav']['user_data_remote_s3_path'] + + remoteLocation = uploadLocalFileToS3(productMetadata['source']) + print(remoteLocation) + productMetadata['source'] = "/vsis3/{}".format(remoteLocation) + + return ingestProduct(productMetadata) + + + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/edav_raster_utils.py b/edav/user-data-ingest-scripts/edav_raster_utils.py new file mode 100644 index 0000000..a6b49e3 --- /dev/null +++ b/edav/user-data-ingest-scripts/edav_raster_utils.py @@ -0,0 +1,41 @@ +import os +import rasterio +from rasterio import warp +import json + +def extractRasterMetadataFromProduct(productPath): + with rasterio.open(productPath, 'r') as product: + meta = { + "source": productPath, + "datasetId": os.path.splitext(os.path.basename(productPath))[0], + "single_multiband": "{}".format(product.count), + "grid": False, + "gridType":"Custom", + "dataset_type": "Raster", + "dataset_dimension":"3", + "dataset_dimension_description":"Lat Long Time", + "defaultViewMode":["band1"] + } + if product.crs: + meta['geolocated'] = True + + bbox = product.bounds + geometry = g = warp.transform_geom( + product.crs, + {'init':'EPSG:4326'}, + { + 'type': 'Polygon', + 'coordinates': [[ + [bbox.left, bbox.bottom], + [bbox.right, bbox.bottom], + [bbox.right, bbox.top], + [bbox.left, bbox.top], + [bbox.left, bbox.bottom] + ]] + } + ) + meta['geometry'] = json.dumps(geometry) + else: + meta['geolocated'] = False + + return meta \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/edav_s3_utils.py b/edav/user-data-ingest-scripts/edav_s3_utils.py new file mode 100644 index 0000000..0db89e8 --- /dev/null +++ b/edav/user-data-ingest-scripts/edav_s3_utils.py @@ -0,0 +1,39 @@ +import os +import configparser + +def S3Login(): + if not("MAAP_EMAIL" in os.environ and "MAAP_PASSWORD" in os.environ): + config = configparser.ConfigParser() + #you have to fill in the auth.ini file with your credentials, check the README.md + config.read('/projects/.maap/auth.ini') + + #Location of the credentials + email = config['auth']['email'] + password = config['auth']['password'] + + #Set env variables + os.environ["MAAP_EMAIL"] = email + os.environ["MAAP_PASSWORD"] = password + + os.system("maap-s3.py login $MAAP_EMAIL $MAAP_PASSWORD") + +def S3Upload(source, target): + os.system("maap-s3.py upload {} {}".format(source, target)) + +def uploadLocalFileToS3(localFilePath): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + targetBasePath = config['edav']['user_data_remote_s3_path'] + uploadLocation = config['edav']['user_data_upload_location'] + + S3Login() + target = "{}/{}/{}/{}".format(targetBasePath, uploadLocation, os.environ["MAAP_EMAIL"].replace('@', '_'), os.path.basename(localFilePath)) + + S3Upload(localFilePath, target) + return target + + + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/ingest-user-product.py b/edav/user-data-ingest-scripts/ingest-user-product.py new file mode 100644 index 0000000..b9af011 --- /dev/null +++ b/edav/user-data-ingest-scripts/ingest-user-product.py @@ -0,0 +1,65 @@ +#!/projects/.conda/envs/maap/bin/python + +import os, sys, argparse +import configparser +import logging + +sys.path.append('./') +from edav_ingest import ingestProduct, uploadLocalProductAndIngest +from edav_raster_utils import extractRasterMetadataFromProduct + +logging.basicConfig(stream=sys.stdout, level=os.environ.get("LOG_LEVEL", "INFO")) + +def ingestUserProduct(productLocation, title, description, productDate, datasetId=None, isLocal=False): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + sourceBasePath = config['edav']['user_data_local_s3_mount'] + targetBasePath = config['edav']['user_data_remote_s3_path'] + + if isLocal is True: + localFilePath = productLocation + else: + localFilePath = "{}/{}".format(sourceBasePath, productLocation) + + productMeta = extractRasterMetadataFromProduct(localFilePath) + productMeta['title'] = title + productMeta['dataset_description'] = description + productMeta['productDate'] = productDate + if datasetId is not None: + productMeta['datasetId'] = datasetId + productMeta['subDatasetId'] = productMeta['datasetId'] + + if isLocal is True: + result = uploadLocalProductAndIngest(productMeta) + else: + remoteFilePath = "{}/{}".format(targetBasePath, productLocation) + productMeta['source'] = "/vsis3/{}".format(remoteFilePath) + result = ingestProduct(productMeta) + + if result['status'] is 'error': + logging.error(result['message']) + else: + logging.info(result['message']) + + logging.debug(result['response'].text) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Ingest a product in edav user data catalogue') + parser.add_argument('--title', help='A friendly name for the dataset', required=True) + parser.add_argument('--description', help='The dataset description', required=True) + parser.add_argument('--product_date', help='The product date as ISO string', required=True) + parser.add_argument('--dataset_id', default=None, help='Set the dataset id (default is to derive it from the file name)') + parser.add_argument('--is_local', action='store_true', help='A flag indicating if the product is local (default to false). When set the product will be first uploaded to S3 and then ingested') + parser.add_argument('product_location', help='The product location. In case of remote file (--is-local not set), it should be the relative path from the S3 user data mount point') + args = parser.parse_args() + + ingestUserProduct( + productLocation=args.product_location, + title=args.title, + description=args.description, + productDate=args.product_date, + datasetId=args.dataset_id, + isLocal=args.is_local + ) diff --git a/init.sh b/init.sh index 49df1bb..c060754 100755 --- a/init.sh +++ b/init.sh @@ -17,3 +17,10 @@ echo "url_token = https://iam.${MAAP_ENV_TYPE,,}.esa-maap.org/oxauth/restv1/toke echo "client_id = $CLIENT_ID" >> $HOME/.maap/maap.ini echo "url_gravitee_s3 = https://gravitee-gateway.${MAAP_ENV_TYPE,,}.esa-maap.org/s3/" >> $HOME/.maap/maap.ini +#edav.ini +touch $HOME/.maap/auth.ini +echo "[edav]" > $HOME/.maap/edav.ini +echo "user_data_ingestion_url = https://edav-das-vap.${MAAP_ENV_TYPE,,}.esa-maap.org/loader/upload.json" >> $HOME/.maap/edav.ini +echo "user_data_remote_s3_path = maap-scientific-data/shared" >> $HOME/.maap/edav.ini +echo "user_data_local_s3_mount = /projects/s3-drive/user-data" >> $HOME/.maap/edav.ini +echo "user_data_upload_location = edav" >> $HOME/.maap/edav.ini \ No newline at end of file -- GitLab