This tutorial demonstrates how to use the ICA Python library packaged with the JupyterLab image for Bench Workspaces.
See the JupyterLab documentation for details about the JupyterLab docker image provided by Illumina.
The tutorial will show how authentication to the ICA API works and how to search, upload, download and delete data from a project into a Bench Workspace. The python code snippets are written for compatibility with a Jupyter Notebook.
Python modules
Navigate to Bench > Workspaces and click Enable to enable workspaces. Select +New Workspace to create a new workspace. Fill in the required details and select JupyterLab for the Docker image. Click Save and Start to open the workspace. The following snippets of code can be pasted into the workspace you've created.
This snippet defines the required python modules for this tutorial:
# Wrapper modules
import icav2
from icav2.api import project_data_api
from icav2.model.problem import Problem
from icav2.model.project_data import ProjectData
# Helper modules
import random
import os
import requests
import string
import hashlib
import getpass
Authentication
This snippet shows how to authenticate using the following methods:
These snippets show how to manage data in a project. Operations shown are:
Create a Project Data API client instance
List all data in a project
Create a data element in a project
Upload a file to a data element in a project
Download a data element from a project
Search for matching data elements in a project
Delete matching data elements in a project
# Retrieve project ID from the Bench workspace environment
projectId = os.environ['ICA_PROJECT']
# Create a Project Data API client instance
projectDataApiInstance = project_data_api.ProjectDataApi(apiClient)
List Data
# List all data in a project
pageOffset = 0
pageSize = 30
try:
projectDataPagedList = projectDataApiInstance.get_project_data_list(project_id = projectId, page_size = str(pageSize), page_offset = str(pageOffset))
totalRecords = projectDataPagedList.total_item_count
while pageOffset*pageSize < totalRecords:
for projectData in projectDataPagedList.items:
print("Path: "+projectData.data.details.path + " - Type: "+projectData.data.details.data_type)
pageOffset = pageOffset + 1
except icav2.ApiException as e:
print("Exception when calling ProjectDataAPIApi->get_project_data_list: %s\n" % e)
Create Data
# Create data element in a project
data = icav2.model.create_data.CreateData(name="test.txt",data_type = "FILE")
try:
projectData = projectDataApiInstance.create_data_in_project(projectId, create_data=data)
fileId = projectData.data.id
except icav2.ApiException as e:
print("Exception when calling ProjectDataAPIApi->create_data_in_project: %s\n" % e)
Upload Data
## Upload a local file to a data element in a project
# Create a local file in a Bench workspace
filename = '/tmp/'+''.join(random.choice(string.ascii_lowercase) for i in range(10))+".txt"
content = ''.join(random.choice(string.ascii_lowercase) for i in range(100))
f = open(filename, "a")
f.write(content)
f.close()
# Calculate MD5 hash (optional)
localFileHash = md5Hash = hashlib.md5((open(filename, 'rb').read())).hexdigest()
try:
# Get Upload URL
upload = projectDataApiInstance.create_upload_url_for_data(project_id = projectId, data_id = fileId)
# Upload dummy file
files = {'file': open(filename, 'r')}
data = open(filename, 'r').read()
r = requests.put(upload.url , data=data)
except icav2.ApiException as e:
print("Exception when calling ProjectDataAPIApi->create_upload_url_for_data: %s\n" % e)
# Delete local dummy file
os.remove(filename)
Download Data
## Download a data element from a project
try:
# Get Download URL
download = projectDataApiInstance.create_download_url_for_data(project_id=projectId, data_id=fileId)
# Download file
filename = '/tmp/'+''.join(random.choice(string.ascii_lowercase) for i in range(10))+".txt"
r = requests.get(download.url)
open(filename, 'wb').write(r.content)
# Verify md5 hash
remoteFileHash = hashlib.md5((open(filename, 'rb').read())).hexdigest()
if localFileHash != remoteFileHash:
print("Error: MD5 mismatch")
# Delete local dummy file
os.remove(filename)
except icav2.ApiException as e:
print("Exception when calling ProjectDataAPIApi->create_download_url_for_data: %s\n" % e)
Search for Data
# Search for matching data elements in a project
try:
projectDataPagedList = projectDataApiInstance.get_project_data_list(project_id = projectId, full_text="test.txt")
for projectData in projectDataPagedList.items:
print("Path: " + projectData.data.details.path + " - Name: "+projectData.data.id + " - Type: "+projectData.data.details.data_type)
except icav2.ApiException as e:
print("Exception when calling ProjectDataAPIApi->get_project_data_list: %s\n" % e)
Delete Data
# Delete matching data elements in a project
try:
projectDataPagedList = projectDataApiInstance.get_project_data_list(project_id = projectId, full_text="test.txt")
for projectData in projectDataPagedList.items:
print("Deleting file "+projectData.data.details.path)
projectDataApiInstance.delete_data(project_id = projectId, data_id = projectData.data.id)
except icav2.ApiException as e:
print("Exception %s\n" % e)
Base Operations
These snippets show how to get a connection to a base database and run an example query. Operations shown are:
Create a python jdbc connection
Create a table
Insert data into a table
Query the table
Delete the table
Snowflake Python API documentation can be found here
This snipppet defines the required python modules for this tutorial:
# API modules
import icav2
from icav2.api import project_base_api
from icav2.model.problem import Problem
from icav2.model.base_connection import BaseConnection
# Helper modules
import os
import requests
import getpass
import snowflake.connector
# Retrieve project ID from the Bench workspace environment
projectId = os.environ['ICA_PROJECT']
# Create a Project Base API client instance
projectBaseApiInstance = project_base_api.ProjectBaseApi(apiClient)
Get Base Access Credentials
# Get a Base Access Token
try:
baseConnection = projectBaseApiInstance.create_base_connection_details(project_id = projectId)
except icav2.ApiException as e:
print("Exception when calling ProjectBaseAPIApi->create_base_connection_details: %s\n" % e)
## Create a python jdbc connection
ctx = snowflake.connector.connect(
account=os.environ["ICA_SNOWFLAKE_ACCOUNT"],
authenticator=baseConnection.authenticator,
token=baseConnection.access_token,
database=os.environ["ICA_SNOWFLAKE_DATABASE"],
role=baseConnection.role_name,
warehouse=baseConnection.warehouse_name
)
ctx.cursor().execute("USE "+os.environ["ICA_SNOWFLAKE_DATABASE"])
Create a Table
## Create a Table
tableName = "test_table"
ctx.cursor().execute("CREATE OR REPLACE TABLE " + tableName + "(col1 integer, col2 string)")
Add Table Record
## Insert data into a table
ctx.cursor().execute(
"INSERT INTO " + tableName + "(col1, col2) VALUES " +
" (123, 'test string1'), " +
" (456, 'test string2')")
Query Table
## Query the table
cur = ctx.cursor()
try:
cur.execute("SELECT * FROM "+tableName)
for (col1, col2) in cur:
print('{0}, {1}'.format(col1, col2))
finally:
cur.close()
Delete Table
# Delete the table
ctx.cursor().execute("DROP TABLE " + tableName);