diff --git a/edav/user-data-ingest-from-module.ipynb b/edav/user-data-ingest-from-module.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..da11d4ac94fceb5a826a5efb2eed810eb91304b1 --- /dev/null +++ b/edav/user-data-ingest-from-module.ipynb @@ -0,0 +1,151 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4de51eb6", + "metadata": {}, + "source": [ + "<h1>Sample user data ingestion notebook</h1>" + ] + }, + { + "cell_type": "markdown", + "id": "8b45f9ee", + "metadata": {}, + "source": [ + "As a first step add the ingestion module location to path. In the future the ingestion module could be automatically put in the python module path." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "5a994d73", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "sys.path.insert(0, os.path.abspath('./user-data-ingest-scripts'))" + ] + }, + { + "cell_type": "markdown", + "id": "11cb449a", + "metadata": {}, + "source": [ + "Then import the product ingestion utils from the edav_ingest module. The module uses the configuration information set in `/projects/.maap/edav.ini`.\n", + "\n", + "It depends on rasterio and ipywidgets libraries that are currently not available in the conda environment. To prepare the environment the following commands were run before importing the script:\n", + "\n", + "`conda install --freeze-installed rasterio`\n", + "\n", + "`conda install --freeze-installed ipywidgets`\n", + "\n", + "`jupyter labextension install @jupyter-widgets/jupyterlab-manager`" + ] + }, + { + "cell_type": "markdown", + "id": "3987a8cd", + "metadata": {}, + "source": [ + "<h3>Local file ingestion</h3>\n", + "\n", + "First we run the local ingestion script passing the location of a local product file. This will generate a form. Complete the missing information and submit it to complete the ingestion. The function uses the `maap-s3.py` script internally to upload the product to S3 (in `[user_data_remote_s3_path]/[user_data_upload_location]/[USER_EMAIL]/` as specified in `/projects/.maap/edav.ini` e.g. `maap-scientific-data/shared/edav/edav_esa-maap.org`). Be sure it is available and that the information in `/projects/.maap/auth.ini` are correct" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a089ae96", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b97de56bbeea4429b6e7caf1a2739756", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(Text(value='georef_UK_classif', description='Dataset ID*:', layout=Layout(width=…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from edav_form import generateIngestionFormForLocalProduct\n", + "\n", + "generateIngestionFormForLocalProduct('/projects/data/georef_UK_classif.tif')" + ] + }, + { + "cell_type": "markdown", + "id": "bb1cbec4", + "metadata": {}, + "source": [ + "<h3>Remote file ingestion</h3>\n", + "\n", + "To ingest a product already on S3 user data location, run the remote ingestion script passing the relative location of the product (relative to the `user_data_local_s3_mount` set in `/projects/.maap/edav.ini`). The assumption is that the S3 location is locally mounted in the jupyter env (e.g. in /project/s3-drive/user-data, configurable in `/projects/.maap/edav.ini`) in order to extract the required information from the product metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "13f4e740", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5f1d32eb92124389907392e2f02baf15", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(Text(value='georef_Ecosse_classif_test', description='Dataset ID*:', layout=Layo…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from edav_form import generateIngestionFormForRemoteProduct\n", + "\n", + "generateIngestionFormForRemoteProduct('GPU_TEST_TIF/georef_Ecosse_classif_test.tiff')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87872c47", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Maap", + "language": "python", + "name": "maap" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_form-checkpoint.py b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_form-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..9129c22fc10e167e0744ed96290709ead8a74a6e --- /dev/null +++ b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_form-checkpoint.py @@ -0,0 +1,171 @@ +import configparser +import datetime +import ipywidgets as widgets +from IPython.display import display, Markdown, HTML + +from edav_raster_utils import extractRasterMetadataFromProduct +from edav_ingest import uploadLocalProductAndIngest, ingestProduct + +def generateIngestionForm(meta, on_submit): + productDate = datetime.datetime.utcnow().replace(minute=0, hour=0) + + if 'productDate' in meta: + try: + productDate = datetime.datetime.strptime(meta['productDate'], '%Y-%m-%dT%H:%M:%SZ') + except BaseException: + pass + + fieldLayout = widgets.Layout(width='50%') + + datasetIdField = widgets.Text( + value=(meta['datasetId'] if 'datasetId' in meta else ''), + placeholder='Set the dataset id', + description='Dataset ID*:', + disabled=False, + layout=fieldLayout + ) + titleField = widgets.Text( + value=(meta['title'] if 'title' in meta else ''), + placeholder='Set the dataset title', + description='Title*:', + disabled=False, + layout=fieldLayout + ) + descriptionField = widgets.Textarea( + value=(meta['description'] if 'description' in meta else ''), + placeholder='Insert a dataset description', + description='Description*:', + disabled=False, + layout=fieldLayout + ) + geolocatedField = widgets.Valid( + value=(meta['geolocated'] if 'geolocated' in meta else False), + description='Geolocated', + disabled=True, + readout='' + ) + geometryField = widgets.Textarea( + value=(meta['geometry'] if 'geometry' in meta else ''), + description='Geometry*:', + placeholder='Insert a GeoJSON polygon', + disabled=True, + rows=5, + layout=fieldLayout + ) + dateField = widgets.DatePicker( + value=productDate, + description='Product date*:', + disabled=False + ) + hourField = widgets.Dropdown( + options=list(map(lambda i: f'{i:02}', range(0, 24))), + value=productDate.strftime('%H'), + disabled=False, + layout=widgets.Layout(width='50px') + ) + minuteField = widgets.Dropdown( + options=list(map(lambda i: f'{i:02}', range(0, 60))), + value=productDate.strftime('%M'), + disabled=False, + layout=widgets.Layout(width='50px') + ) + submitButton = widgets.Button( + description='Submit', + disabled=False, + button_style='info', # 'success', 'info', 'warning', 'danger' or '' + layout=widgets.Layout(width='50%', margin='20px 0 0 0') + ) + + if geolocatedField.value is False: + geometryField.disabled=False + + output = widgets.Output() + + def set_error_message(message): + display(Markdown("<span style='color: red'>Error: {}</span>".format(message))) + + def on_form_submit(b): + with output: + output.clear_output() + if not datasetIdField.value: + set_error_message('Dataset ID is required') + return + if not titleField.value: + set_error_message('Title is required') + return + if not descriptionField.value: + set_error_message('Description is required') + return + if not dateField.value: + set_error_message('Product date is required') + return + if not geometryField.value: + set_error_message('Geometry is required') + return + + submitButton.disabled = True + submitButton.icon = 'spinner' + submitButton.button_style = '' + + meta['datasetId'] = datasetIdField.value + meta['subDatasetId'] = datasetIdField.value + meta['title'] = titleField.value + if (descriptionField.value): + meta['dataset_description'] = descriptionField.value + else: + del meta['dataset_description'] + meta['productDate'] = "{}T{}:{}:00Z".format(dateField.value.strftime('%Y-%m-%d'), hourField.value, minuteField.value) + + result = on_submit(meta) + + submitButton.disabled = False + submitButton.icon = '' + submitButton.button_style = 'info' + + if result['status'] is 'error': + set_error_message(result['message']) + display(HTML(result['response'].text )) + else: + display(result['message']) + display(result['response'].json()) + + + submitButton.on_click(on_form_submit) + + display(widgets.VBox([ + widgets.HBox([datasetIdField]), + widgets.HBox([titleField]), + widgets.HBox([descriptionField]), + widgets.HBox([geolocatedField]), + widgets.HBox([geometryField]), + widgets.HBox([dateField, hourField, minuteField]), + widgets.HBox([submitButton]), + widgets.HBox([output]) + ])) + + + +def generateIngestionFormForLocalProduct(productLocation): + productMetadata = extractRasterMetadataFromProduct(productLocation) + print(productMetadata) + generateIngestionForm(productMetadata, uploadLocalProductAndIngest) + + +def generateIngestionFormForRemoteProduct(productLocation): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + sourceBasePath = config['edav']['user_data_local_s3_mount'] + targetBasePath = config['edav']['user_data_remote_s3_path'] + + localFilePath = "{}/{}".format(sourceBasePath, productLocation) + remoteFilePath = "{}/{}".format(targetBasePath, productLocation) + + meta = extractRasterMetadataFromProduct(localFilePath) + + + meta['source'] = "/vsis3/{}".format(remoteFilePath) + generateIngestionForm(meta, ingestProduct) + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_ingest-checkpoint.py b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_ingest-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..b0735a1447c679a76fec999bc16bf1bf3adb97e3 --- /dev/null +++ b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_ingest-checkpoint.py @@ -0,0 +1,48 @@ +import os +import configparser +import json +import requests + +from edav_s3_utils import uploadLocalFileToS3 + +def ingestProduct(productMetadata): + + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + ingestionApiUrl = config['edav']['user_data_ingestion_url'] + + data = {} + data[productMetadata['datasetId']] = productMetadata + + r = requests.post( ingestionApiUrl, data={"metadata": json.dumps(data)}) + if r.status_code != 200: + return { + "status": "error", + "message": "Product ingestion failed", + "response": r + } + else: + return { + "status": "success", + "message": "Product succesfully ingested", + "response": r + } + + +def uploadLocalProductAndIngest(productMetadata): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + targetBasePath = config['edav']['user_data_remote_s3_path'] + + remoteLocation = uploadLocalFileToS3(productMetadata['source']) + print(remoteLocation) + productMetadata['source'] = "/vsis3/{}".format(remoteLocation) + + return ingestProduct(productMetadata) + + + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_raster_utils-checkpoint.py b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_raster_utils-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..a6b49e342de8ecc9160d0ec642ec3ff19d6c59c9 --- /dev/null +++ b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_raster_utils-checkpoint.py @@ -0,0 +1,41 @@ +import os +import rasterio +from rasterio import warp +import json + +def extractRasterMetadataFromProduct(productPath): + with rasterio.open(productPath, 'r') as product: + meta = { + "source": productPath, + "datasetId": os.path.splitext(os.path.basename(productPath))[0], + "single_multiband": "{}".format(product.count), + "grid": False, + "gridType":"Custom", + "dataset_type": "Raster", + "dataset_dimension":"3", + "dataset_dimension_description":"Lat Long Time", + "defaultViewMode":["band1"] + } + if product.crs: + meta['geolocated'] = True + + bbox = product.bounds + geometry = g = warp.transform_geom( + product.crs, + {'init':'EPSG:4326'}, + { + 'type': 'Polygon', + 'coordinates': [[ + [bbox.left, bbox.bottom], + [bbox.right, bbox.bottom], + [bbox.right, bbox.top], + [bbox.left, bbox.top], + [bbox.left, bbox.bottom] + ]] + } + ) + meta['geometry'] = json.dumps(geometry) + else: + meta['geolocated'] = False + + return meta \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_s3_utils-checkpoint.py b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_s3_utils-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..0db89e8e04e3a6105852b6657dc6aed53bd8a725 --- /dev/null +++ b/edav/user-data-ingest-scripts/.ipynb_checkpoints/edav_s3_utils-checkpoint.py @@ -0,0 +1,39 @@ +import os +import configparser + +def S3Login(): + if not("MAAP_EMAIL" in os.environ and "MAAP_PASSWORD" in os.environ): + config = configparser.ConfigParser() + #you have to fill in the auth.ini file with your credentials, check the README.md + config.read('/projects/.maap/auth.ini') + + #Location of the credentials + email = config['auth']['email'] + password = config['auth']['password'] + + #Set env variables + os.environ["MAAP_EMAIL"] = email + os.environ["MAAP_PASSWORD"] = password + + os.system("maap-s3.py login $MAAP_EMAIL $MAAP_PASSWORD") + +def S3Upload(source, target): + os.system("maap-s3.py upload {} {}".format(source, target)) + +def uploadLocalFileToS3(localFilePath): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + targetBasePath = config['edav']['user_data_remote_s3_path'] + uploadLocation = config['edav']['user_data_upload_location'] + + S3Login() + target = "{}/{}/{}/{}".format(targetBasePath, uploadLocation, os.environ["MAAP_EMAIL"].replace('@', '_'), os.path.basename(localFilePath)) + + S3Upload(localFilePath, target) + return target + + + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/.ipynb_checkpoints/ingest-user-product-checkpoint.py b/edav/user-data-ingest-scripts/.ipynb_checkpoints/ingest-user-product-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..b9af0118495ceca63cb3454037e2f52859e5c474 --- /dev/null +++ b/edav/user-data-ingest-scripts/.ipynb_checkpoints/ingest-user-product-checkpoint.py @@ -0,0 +1,65 @@ +#!/projects/.conda/envs/maap/bin/python + +import os, sys, argparse +import configparser +import logging + +sys.path.append('./') +from edav_ingest import ingestProduct, uploadLocalProductAndIngest +from edav_raster_utils import extractRasterMetadataFromProduct + +logging.basicConfig(stream=sys.stdout, level=os.environ.get("LOG_LEVEL", "INFO")) + +def ingestUserProduct(productLocation, title, description, productDate, datasetId=None, isLocal=False): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + sourceBasePath = config['edav']['user_data_local_s3_mount'] + targetBasePath = config['edav']['user_data_remote_s3_path'] + + if isLocal is True: + localFilePath = productLocation + else: + localFilePath = "{}/{}".format(sourceBasePath, productLocation) + + productMeta = extractRasterMetadataFromProduct(localFilePath) + productMeta['title'] = title + productMeta['dataset_description'] = description + productMeta['productDate'] = productDate + if datasetId is not None: + productMeta['datasetId'] = datasetId + productMeta['subDatasetId'] = productMeta['datasetId'] + + if isLocal is True: + result = uploadLocalProductAndIngest(productMeta) + else: + remoteFilePath = "{}/{}".format(targetBasePath, productLocation) + productMeta['source'] = "/vsis3/{}".format(remoteFilePath) + result = ingestProduct(productMeta) + + if result['status'] is 'error': + logging.error(result['message']) + else: + logging.info(result['message']) + + logging.debug(result['response'].text) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Ingest a product in edav user data catalogue') + parser.add_argument('--title', help='A friendly name for the dataset', required=True) + parser.add_argument('--description', help='The dataset description', required=True) + parser.add_argument('--product_date', help='The product date as ISO string', required=True) + parser.add_argument('--dataset_id', default=None, help='Set the dataset id (default is to derive it from the file name)') + parser.add_argument('--is_local', action='store_true', help='A flag indicating if the product is local (default to false). When set the product will be first uploaded to S3 and then ingested') + parser.add_argument('product_location', help='The product location. In case of remote file (--is-local not set), it should be the relative path from the S3 user data mount point') + args = parser.parse_args() + + ingestUserProduct( + productLocation=args.product_location, + title=args.title, + description=args.description, + productDate=args.product_date, + datasetId=args.dataset_id, + isLocal=args.is_local + ) diff --git a/edav/user-data-ingest-scripts/__pycache__/edav_form.cpython-37.pyc b/edav/user-data-ingest-scripts/__pycache__/edav_form.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4de150d2fd54cfeecf2e1bc7aed9d4c977758932 Binary files /dev/null and b/edav/user-data-ingest-scripts/__pycache__/edav_form.cpython-37.pyc differ diff --git a/edav/user-data-ingest-scripts/__pycache__/edav_ingest.cpython-37.pyc b/edav/user-data-ingest-scripts/__pycache__/edav_ingest.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..14784db28c2ad3b7e96a29d05320cd5a26254626 Binary files /dev/null and b/edav/user-data-ingest-scripts/__pycache__/edav_ingest.cpython-37.pyc differ diff --git a/edav/user-data-ingest-scripts/__pycache__/edav_raster_utils.cpython-37.pyc b/edav/user-data-ingest-scripts/__pycache__/edav_raster_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c069341dc4cf9503b3b56414a42f1523abce7dd2 Binary files /dev/null and b/edav/user-data-ingest-scripts/__pycache__/edav_raster_utils.cpython-37.pyc differ diff --git a/edav/user-data-ingest-scripts/__pycache__/edav_s3_utils.cpython-37.pyc b/edav/user-data-ingest-scripts/__pycache__/edav_s3_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..108448b444de66767739d6c7db0c74b1caab7806 Binary files /dev/null and b/edav/user-data-ingest-scripts/__pycache__/edav_s3_utils.cpython-37.pyc differ diff --git a/edav/user-data-ingest-scripts/edav_form.py b/edav/user-data-ingest-scripts/edav_form.py new file mode 100644 index 0000000000000000000000000000000000000000..9129c22fc10e167e0744ed96290709ead8a74a6e --- /dev/null +++ b/edav/user-data-ingest-scripts/edav_form.py @@ -0,0 +1,171 @@ +import configparser +import datetime +import ipywidgets as widgets +from IPython.display import display, Markdown, HTML + +from edav_raster_utils import extractRasterMetadataFromProduct +from edav_ingest import uploadLocalProductAndIngest, ingestProduct + +def generateIngestionForm(meta, on_submit): + productDate = datetime.datetime.utcnow().replace(minute=0, hour=0) + + if 'productDate' in meta: + try: + productDate = datetime.datetime.strptime(meta['productDate'], '%Y-%m-%dT%H:%M:%SZ') + except BaseException: + pass + + fieldLayout = widgets.Layout(width='50%') + + datasetIdField = widgets.Text( + value=(meta['datasetId'] if 'datasetId' in meta else ''), + placeholder='Set the dataset id', + description='Dataset ID*:', + disabled=False, + layout=fieldLayout + ) + titleField = widgets.Text( + value=(meta['title'] if 'title' in meta else ''), + placeholder='Set the dataset title', + description='Title*:', + disabled=False, + layout=fieldLayout + ) + descriptionField = widgets.Textarea( + value=(meta['description'] if 'description' in meta else ''), + placeholder='Insert a dataset description', + description='Description*:', + disabled=False, + layout=fieldLayout + ) + geolocatedField = widgets.Valid( + value=(meta['geolocated'] if 'geolocated' in meta else False), + description='Geolocated', + disabled=True, + readout='' + ) + geometryField = widgets.Textarea( + value=(meta['geometry'] if 'geometry' in meta else ''), + description='Geometry*:', + placeholder='Insert a GeoJSON polygon', + disabled=True, + rows=5, + layout=fieldLayout + ) + dateField = widgets.DatePicker( + value=productDate, + description='Product date*:', + disabled=False + ) + hourField = widgets.Dropdown( + options=list(map(lambda i: f'{i:02}', range(0, 24))), + value=productDate.strftime('%H'), + disabled=False, + layout=widgets.Layout(width='50px') + ) + minuteField = widgets.Dropdown( + options=list(map(lambda i: f'{i:02}', range(0, 60))), + value=productDate.strftime('%M'), + disabled=False, + layout=widgets.Layout(width='50px') + ) + submitButton = widgets.Button( + description='Submit', + disabled=False, + button_style='info', # 'success', 'info', 'warning', 'danger' or '' + layout=widgets.Layout(width='50%', margin='20px 0 0 0') + ) + + if geolocatedField.value is False: + geometryField.disabled=False + + output = widgets.Output() + + def set_error_message(message): + display(Markdown("<span style='color: red'>Error: {}</span>".format(message))) + + def on_form_submit(b): + with output: + output.clear_output() + if not datasetIdField.value: + set_error_message('Dataset ID is required') + return + if not titleField.value: + set_error_message('Title is required') + return + if not descriptionField.value: + set_error_message('Description is required') + return + if not dateField.value: + set_error_message('Product date is required') + return + if not geometryField.value: + set_error_message('Geometry is required') + return + + submitButton.disabled = True + submitButton.icon = 'spinner' + submitButton.button_style = '' + + meta['datasetId'] = datasetIdField.value + meta['subDatasetId'] = datasetIdField.value + meta['title'] = titleField.value + if (descriptionField.value): + meta['dataset_description'] = descriptionField.value + else: + del meta['dataset_description'] + meta['productDate'] = "{}T{}:{}:00Z".format(dateField.value.strftime('%Y-%m-%d'), hourField.value, minuteField.value) + + result = on_submit(meta) + + submitButton.disabled = False + submitButton.icon = '' + submitButton.button_style = 'info' + + if result['status'] is 'error': + set_error_message(result['message']) + display(HTML(result['response'].text )) + else: + display(result['message']) + display(result['response'].json()) + + + submitButton.on_click(on_form_submit) + + display(widgets.VBox([ + widgets.HBox([datasetIdField]), + widgets.HBox([titleField]), + widgets.HBox([descriptionField]), + widgets.HBox([geolocatedField]), + widgets.HBox([geometryField]), + widgets.HBox([dateField, hourField, minuteField]), + widgets.HBox([submitButton]), + widgets.HBox([output]) + ])) + + + +def generateIngestionFormForLocalProduct(productLocation): + productMetadata = extractRasterMetadataFromProduct(productLocation) + print(productMetadata) + generateIngestionForm(productMetadata, uploadLocalProductAndIngest) + + +def generateIngestionFormForRemoteProduct(productLocation): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + sourceBasePath = config['edav']['user_data_local_s3_mount'] + targetBasePath = config['edav']['user_data_remote_s3_path'] + + localFilePath = "{}/{}".format(sourceBasePath, productLocation) + remoteFilePath = "{}/{}".format(targetBasePath, productLocation) + + meta = extractRasterMetadataFromProduct(localFilePath) + + + meta['source'] = "/vsis3/{}".format(remoteFilePath) + generateIngestionForm(meta, ingestProduct) + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/edav_ingest.py b/edav/user-data-ingest-scripts/edav_ingest.py new file mode 100644 index 0000000000000000000000000000000000000000..b0735a1447c679a76fec999bc16bf1bf3adb97e3 --- /dev/null +++ b/edav/user-data-ingest-scripts/edav_ingest.py @@ -0,0 +1,48 @@ +import os +import configparser +import json +import requests + +from edav_s3_utils import uploadLocalFileToS3 + +def ingestProduct(productMetadata): + + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + ingestionApiUrl = config['edav']['user_data_ingestion_url'] + + data = {} + data[productMetadata['datasetId']] = productMetadata + + r = requests.post( ingestionApiUrl, data={"metadata": json.dumps(data)}) + if r.status_code != 200: + return { + "status": "error", + "message": "Product ingestion failed", + "response": r + } + else: + return { + "status": "success", + "message": "Product succesfully ingested", + "response": r + } + + +def uploadLocalProductAndIngest(productMetadata): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + targetBasePath = config['edav']['user_data_remote_s3_path'] + + remoteLocation = uploadLocalFileToS3(productMetadata['source']) + print(remoteLocation) + productMetadata['source'] = "/vsis3/{}".format(remoteLocation) + + return ingestProduct(productMetadata) + + + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/edav_raster_utils.py b/edav/user-data-ingest-scripts/edav_raster_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a6b49e342de8ecc9160d0ec642ec3ff19d6c59c9 --- /dev/null +++ b/edav/user-data-ingest-scripts/edav_raster_utils.py @@ -0,0 +1,41 @@ +import os +import rasterio +from rasterio import warp +import json + +def extractRasterMetadataFromProduct(productPath): + with rasterio.open(productPath, 'r') as product: + meta = { + "source": productPath, + "datasetId": os.path.splitext(os.path.basename(productPath))[0], + "single_multiband": "{}".format(product.count), + "grid": False, + "gridType":"Custom", + "dataset_type": "Raster", + "dataset_dimension":"3", + "dataset_dimension_description":"Lat Long Time", + "defaultViewMode":["band1"] + } + if product.crs: + meta['geolocated'] = True + + bbox = product.bounds + geometry = g = warp.transform_geom( + product.crs, + {'init':'EPSG:4326'}, + { + 'type': 'Polygon', + 'coordinates': [[ + [bbox.left, bbox.bottom], + [bbox.right, bbox.bottom], + [bbox.right, bbox.top], + [bbox.left, bbox.top], + [bbox.left, bbox.bottom] + ]] + } + ) + meta['geometry'] = json.dumps(geometry) + else: + meta['geolocated'] = False + + return meta \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/edav_s3_utils.py b/edav/user-data-ingest-scripts/edav_s3_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0db89e8e04e3a6105852b6657dc6aed53bd8a725 --- /dev/null +++ b/edav/user-data-ingest-scripts/edav_s3_utils.py @@ -0,0 +1,39 @@ +import os +import configparser + +def S3Login(): + if not("MAAP_EMAIL" in os.environ and "MAAP_PASSWORD" in os.environ): + config = configparser.ConfigParser() + #you have to fill in the auth.ini file with your credentials, check the README.md + config.read('/projects/.maap/auth.ini') + + #Location of the credentials + email = config['auth']['email'] + password = config['auth']['password'] + + #Set env variables + os.environ["MAAP_EMAIL"] = email + os.environ["MAAP_PASSWORD"] = password + + os.system("maap-s3.py login $MAAP_EMAIL $MAAP_PASSWORD") + +def S3Upload(source, target): + os.system("maap-s3.py upload {} {}".format(source, target)) + +def uploadLocalFileToS3(localFilePath): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + targetBasePath = config['edav']['user_data_remote_s3_path'] + uploadLocation = config['edav']['user_data_upload_location'] + + S3Login() + target = "{}/{}/{}/{}".format(targetBasePath, uploadLocation, os.environ["MAAP_EMAIL"].replace('@', '_'), os.path.basename(localFilePath)) + + S3Upload(localFilePath, target) + return target + + + + + \ No newline at end of file diff --git a/edav/user-data-ingest-scripts/ingest-user-product.py b/edav/user-data-ingest-scripts/ingest-user-product.py new file mode 100644 index 0000000000000000000000000000000000000000..b9af0118495ceca63cb3454037e2f52859e5c474 --- /dev/null +++ b/edav/user-data-ingest-scripts/ingest-user-product.py @@ -0,0 +1,65 @@ +#!/projects/.conda/envs/maap/bin/python + +import os, sys, argparse +import configparser +import logging + +sys.path.append('./') +from edav_ingest import ingestProduct, uploadLocalProductAndIngest +from edav_raster_utils import extractRasterMetadataFromProduct + +logging.basicConfig(stream=sys.stdout, level=os.environ.get("LOG_LEVEL", "INFO")) + +def ingestUserProduct(productLocation, title, description, productDate, datasetId=None, isLocal=False): + config = configparser.ConfigParser() + config.read('/projects/.maap/edav.ini') + + sourceBasePath = config['edav']['user_data_local_s3_mount'] + targetBasePath = config['edav']['user_data_remote_s3_path'] + + if isLocal is True: + localFilePath = productLocation + else: + localFilePath = "{}/{}".format(sourceBasePath, productLocation) + + productMeta = extractRasterMetadataFromProduct(localFilePath) + productMeta['title'] = title + productMeta['dataset_description'] = description + productMeta['productDate'] = productDate + if datasetId is not None: + productMeta['datasetId'] = datasetId + productMeta['subDatasetId'] = productMeta['datasetId'] + + if isLocal is True: + result = uploadLocalProductAndIngest(productMeta) + else: + remoteFilePath = "{}/{}".format(targetBasePath, productLocation) + productMeta['source'] = "/vsis3/{}".format(remoteFilePath) + result = ingestProduct(productMeta) + + if result['status'] is 'error': + logging.error(result['message']) + else: + logging.info(result['message']) + + logging.debug(result['response'].text) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Ingest a product in edav user data catalogue') + parser.add_argument('--title', help='A friendly name for the dataset', required=True) + parser.add_argument('--description', help='The dataset description', required=True) + parser.add_argument('--product_date', help='The product date as ISO string', required=True) + parser.add_argument('--dataset_id', default=None, help='Set the dataset id (default is to derive it from the file name)') + parser.add_argument('--is_local', action='store_true', help='A flag indicating if the product is local (default to false). When set the product will be first uploaded to S3 and then ingested') + parser.add_argument('product_location', help='The product location. In case of remote file (--is-local not set), it should be the relative path from the S3 user data mount point') + args = parser.parse_args() + + ingestUserProduct( + productLocation=args.product_location, + title=args.title, + description=args.description, + productDate=args.product_date, + datasetId=args.dataset_id, + isLocal=args.is_local + ) diff --git a/init.sh b/init.sh index 49df1bbd198e3f19c3c9bc4c9c56ef3bf2b83c06..c060754d53b8f59593d4237dea0071bdb54a72d6 100755 --- a/init.sh +++ b/init.sh @@ -17,3 +17,10 @@ echo "url_token = https://iam.${MAAP_ENV_TYPE,,}.esa-maap.org/oxauth/restv1/toke echo "client_id = $CLIENT_ID" >> $HOME/.maap/maap.ini echo "url_gravitee_s3 = https://gravitee-gateway.${MAAP_ENV_TYPE,,}.esa-maap.org/s3/" >> $HOME/.maap/maap.ini +#edav.ini +touch $HOME/.maap/auth.ini +echo "[edav]" > $HOME/.maap/edav.ini +echo "user_data_ingestion_url = https://edav-das-vap.${MAAP_ENV_TYPE,,}.esa-maap.org/loader/upload.json" >> $HOME/.maap/edav.ini +echo "user_data_remote_s3_path = maap-scientific-data/shared" >> $HOME/.maap/edav.ini +echo "user_data_local_s3_mount = /projects/s3-drive/user-data" >> $HOME/.maap/edav.ini +echo "user_data_upload_location = edav" >> $HOME/.maap/edav.ini \ No newline at end of file