Skip to content
Snippets Groups Projects
maap-s3.py 12.8 KiB
Newer Older
edupin's avatar
edupin committed
#!/usr/local/bin/python
import sys, getopt
import requests
import json
import sys
import os
import time
import math
import os.path as path
# Import getopt module
import getopt


MAAP_ENV_TYPE = os.getenv("MAAP_ENV_TYPE")
CLIENT_ID = os.getenv("CLIENT_ID")
BEARER=""
USER_INFO_FILE_PATH="/usr/bmap/maap-s3-userinfo.json"
userinfo = {}

def display_help():
    print('Usage: [option...] {-f|-u|-d|-l|-r}')
    #print('-i                                                                   Get a fresh token before any request. It ask for email and password')
    print('-u   myFile.tiff locally          path/myFile.tiff in the S3         Upload data in the S3')
    print('-d   path/in/S3/file.tiff  myFileName.tiff                           Download a data from the S3')
    print('-l   folder/path                                                 List data in a subfolder')
    print('-r   path/in/S3/file.tiff                                            Delete an existing data on S3')
    print('-f                                                                   refresh credentials and password')
    sys.exit(2)


#########################
# Init the bearer       #
#########################
def init():
    if os.path.isfile(USER_INFO_FILE_PATH):
        print("[INFO] Personal user info is find")
        #Check if the file is created less than one hour
        if is_file_older_than_x_hour(USER_INFO_FILE_PATH):
            print("[INFO] Token is expired, we generate a new one")
            #Get the email and password info
            with open(USER_INFO_FILE_PATH) as json_file:
                userinfo = json.load(json_file)
            #Get the info
            email=userinfo['email']
            password=userinfo['password']
            #Regenerate token
            #Function to generate a new token
            generate_token(email, password)            

        else:
            print("[INFO] Token is still valid")

    else:
        print("[INFO] Personal user info is not found")
        refresh()


###########################
# Refresh token and save #
###########################
def refresh():
    email = input("Your email: ")
    #password
    password = input("Your password: ")           
    #Function to generate a new token
    generate_token(email, password)


###########################
# Generate token and save #
###########################
def generate_token(email, password): 
        
    print("[INFO] Start retrieving token for authent")
    #Set the bearer
    url = "https://iam."+MAAP_ENV_TYPE.lower()+".esa-maap.org/oxauth/restv1/token"
    print (url)
    print (CLIENT_ID)
    response = requests.post(url, data={'client_id': CLIENT_ID, 'username': email, 'password': password, "grant_type": "password", "scope": "openid+profile"})
    print(response)
    #Convert the string to json to fecth access_token
    data = json.loads(response.text)
    token = data['access_token']

    # add the token in the json info file
    #Create a json with email and password
    userinfo = {
        'email': email,
        'password': password,
        'token': token
    }

    if token: 
        #add the json in the file
        with open(USER_INFO_FILE_PATH, 'w') as outfile:
            json.dump(userinfo, outfile)
           
        print("[INFO] Token saved for one hour and ready to be used "+token)
        
    else:
        print("[ERROR] Token is empty. Please 1) run refresh (-r) function and check your password")
        # Terminate the script
        sys.exit(2)


#########################
# Check if file is older#
#########################
def is_file_older_than_x_hour(file, hour=1): 
    file_time = path.getmtime(file) 
    # Check against 214 hour 
    return ((time.time() - file_time) > 3600*hour)

#########################
# Upload the data in S3 #
#########################
def upload(sourceFile, destination):
    print("[INFO] Source file is : ", sourceFile)
    print("[INFO] Destination file is : ", destination) 

    if sourceFile and destination:
        print("[INFO] Get an existing or fresh token")
        #Generate or get a token
        init()

            
        # If the file is less that 100 MB we upload directly
        #Check file size
        fileSize = os.stat(sourceFile).st_size
        print("Size "+ str(fileSize))
    
        #We have more than 5GB
        if fileSize > 5000000000:
            #We upload the multi part data
            print("[INFO] Starting multi part upload")
            upload_multipart(sourceFile, destination)

        else: 
            with open(USER_INFO_FILE_PATH) as json_file:
                userinfo = json.load(json_file)
                #Get the info
                token=userinfo['token']

            print("[INFO] Starting retrieving the presigned url for the creation of the file with token "+ token)
            #files = {'upload_file': open(sourceFile,'rb')}
            url = "https://gravitee-gateway."+MAAP_ENV_TYPE.lower()+".esa-maap.org/s3/"+destination

            response = requests.put(url, headers = {'Authorization': 'Bearer '+token}, allow_redirects=False)
            location = response.headers['Location']
            print("[INFO] Location is "+ location)

            if location:
                print("[INFO] Start uploading the file")
                with open(sourceFile, 'rb') as f:
                    response = requests.put(location, data=f)
                    print(response)
                #files = {'file': open(sourceFile, 'rb')}
                #r = requests.put(location, files=files)
                
            else:
                print("[ERROR] Presigned url not generated. Please re run refresh or contact admin if the error persist")

    else:
        display_help()



###########################################################
# Upload the data in S3, the data is split chunk by chunk #
###########################################################
def upload_multipart(sourceFile, destination):

    #Get the token
    with open(USER_INFO_FILE_PATH) as json_file:
        userinfo = json.load(json_file)
        #Get the info
        token=userinfo['token']

    #Set variables
    filePath = sourceFile
    key = destination

    fileSize = os.stat(filePath).st_size
    print("Size "+ str(fileSize))
    #Set max to split to 5M
    max_size = 5 * 1024 * 1024 # Approach 1: Assign the size 
    nbParts = math.ceil(fileSize/max_size)    #calculate nbParts
    print("[INFO] We will have "+ str(nbParts)+" parts")
            
    
    url = "https://gravitee-gateway."+MAAP_ENV_TYPE.lower()+".esa-maap.org/s3/generateUploadId"
    params={'bucketName': 'bmap-catalogue-data', 'objectKey': key}
    response = requests.get(url, params = params,  headers = {'Authorization': 'Bearer '+token})
 
    print("[INFO] uploadId "+ response.text)
    #Save upload id
    uploadId = response.text

    #Generate presigned urls 
    url = "https://gravitee-gateway."+MAAP_ENV_TYPE.lower()+".esa-maap.org/s3/generateListPresignedUrls"
    params={'bucketName': 'bmap-catalogue-data', 'objectKey': key, 'nbParts': nbParts, 'uploadId': uploadId}
    response = requests.get(url, params = params, headers = {'Authorization': 'Bearer '+token})

    stringList = response.text
    str1 = stringList.replace(']','').replace('[','')
    listPresignedUrl  = str1.replace('"','').split(",")

    # we load the data
    print(listPresignedUrl)
    parts = []

    #sys.stdout = open("log.txt", "w")
    with open(filePath, 'rb') as f:    
        i = 0
        while i < nbParts:
            print("Upload part "+ str(i))
            file_data = f.read(max_size)
            headers={'Content-Length': str(max_size)}
            #print(listPresignedUrl[i])
            response = requests.put(listPresignedUrl[i], data=file_data, headers=headers)
            #print(response.headers)
            #print(response.text)
            etag = response.headers['ETag']  
            parts.append({'eTag': etag, 'partNumber': int(i+1)})
            print(parts)
            i = i+1


    #sys.stdout.close()
    #complete the multi part
    url = "https://gravitee-gateway."+MAAP_ENV_TYPE.lower()+".esa-maap.org/s3/completeMultiPartUploadRequest"
    params={'bucketName': 'bmap-catalogue-data', 'objectKey': key, 'nbParts': nbParts, 'uploadId': uploadId}
    response = requests.get(url, data=str(parts),  params = params, headers = {'Authorization': 'Bearer '+token})


###################
# Delete the data #
####################
def delete(destination):
    print("[INFO] Destination file is : ", destination)
    
    if destination:
        print("[INFO] Get an existing or fresh token")
        #Generate or get a token
        init()
        with open(USER_INFO_FILE_PATH) as json_file:
            userinfo = json.load(json_file)
            #Get the info
            token=userinfo['token']
            
        #call the api to delete the data
        #Get the presigned url to delete the data
        url = "https://gravitee-gateway."+MAAP_ENV_TYPE.lower()+".esa-maap.org/s3/"+destination

        response = requests.delete(url, headers = {'Authorization': 'Bearer '+token}, allow_redirects=False)     
        location = response.headers['Location']
        
        #We have the 
        if location:
            #We delete the data using the location
            print("[INFO] we are about to delete")
            response = requests.delete(location)
            print(response)
        
    else:
        display_help()




###################
# download the data #
####################
def download(path, name):
    print("[INFO] path file is : ", path)
    
    if path:
        print("[INFO] Get an existing or fresh token")
        #Generate or get a token
        init()
        with open(USER_INFO_FILE_PATH) as json_file:
            userinfo = json.load(json_file)
            #Get the info
            token=userinfo['token']
            
        #Get the presigned url to download the data
        url = "https://gravitee-gateway."+MAAP_ENV_TYPE.lower()+".esa-maap.org/s3/"+path

        response = requests.get(url, headers = {'Authorization': 'Bearer '+token}, allow_redirects=False)     
        location = response.headers['Location']
        
        #We have the 
        if location:
            #We download the data using the location
            print("[INFO] we are about to download the data")
            
            response = requests.get(location)
            open(name, 'wb').write(response.content)
            print("[INFO] Download finished")
        
    else:
        display_help()




##########################
# list data in s3 folder #
##########################
def list(path):
    print("[INFO]: Start finding data in path : "+path)
    
    if path:
        print("[INFO] Get an existing or fresh token")
        #Generate or get a token
        init()
        with open(USER_INFO_FILE_PATH) as json_file:
            userinfo = json.load(json_file)
            #Get the info
            token=userinfo['token']
            
        #call the api to delete the data
        #Get the presigned url to delete the data

        url = "https://gravitee-gateway."+MAAP_ENV_TYPE.lower()+".esa-maap.org/s3/"+path+"?list=true"

        response = requests.get(url, headers = {'Authorization': 'Bearer '+token}, allow_redirects=False)     
        print("[INFO] Result list:")  
        if(response.text):            
            print(response.text)
        else:
            print("[INFO] No data found")
    else:
        display_help()

# Store argument variable omitting the script name
argv = sys.argv[1:]

# Initialize result variable
result=0
 
try:
 
  # Define getopt short and long options
  options, args = getopt.getopt(sys.argv[1:], 'u:r:l:d:i:f:h', ['upload=', 'remove=','list=','download=','init=','refresh=','help='])
 
  # Read each option using for loop
  for opt, arg in options:
    # Calculate the sum if the option is -a or --add
    if opt in ('-u', '--upload'):
        # Upload a data
        if len(argv) != 3:          
            display_help()
        else:
            upload(argv[1], argv[2])
    # Delete the data
    elif opt in ('-r', '--remove'):
        # Delete a data
        if len(argv) != 2:
            display_help()
        else:
            delete(argv[1])
    elif opt in ('-l', '--list'):
        # list a folder
        if len(argv) != 2:
            display_help()
        else:
            list(argv[1])
    elif opt in ('-d', '--download'):
        # Download a data
        if len(argv) != 3:
            display_help()
        else:
            download(argv[1], argv[2])    
    elif opt in ('-i', '--init'):
        # Download a data
        init()
    elif opt in ('-f', '--refresh'):
        # Download a data
        refresh()
    elif opt in ('-h', '--help'):
      # Print the option
      display_help()



except getopt.GetoptError:

  # Print the error message if the wrong option is provided
  print('The wrong option is provided. Please run -h')
 
  # Terminate the script
  sys.exit(2)