FROM centos:7
WORKDIR /usr/local
# DEPENDENCIES
RUN yum -y install java-1.8.0-openjdk wget unzip perl && \
yum clean all && \
rm -rf /var/cache/yum
# INSTALLATION fastqc
RUN wget http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.9.zip --no-check-certificate && \
unzip fastqc_v0.11.9.zip && \
chmod a+rx /usr/local/FastQC/fastqc && rm -rf fastqc_v0.11.9.zip
# Adding FastQC to the PATH
ENV PATH $PATH:/usr/local/FastQC
# DEFAULTS
ENV LANG=en_US.UTF-8
ENV LC_ALL=en_US.UTF-8
ENTRYPOINT []
## how to build the docker image
## docker build --file fastqc-0.11.9.Dockerfile --tag fastqc-0.11.9:0 .
## docker run --rm -i -t --entrypoint /bin/bash fastqc-0.11.9:0
docker build --file fastqc-0.11.9.Dockerfile --tag fastqc-0.11.9:1 .docker imagesdocker run --rm -i -t --entrypoint /bin/bash fastqc-0.11.9:1docker save fastqc-0.11.9:1 -o fastqc-0.11.9:1.tar.gz#!/usr/bin/env cwl-runner
# (Re)generated by BlueBee Platform
$namespaces:
ilmn-tes: http://platform.illumina.com/rdf/iap/
cwlVersion: cwl:v1.0
class: CommandLineTool
label: FastQC
doc: FastQC aims to provide a simple way to do some quality control checks on raw
sequence data coming from high throughput sequencing pipelines.
inputs:
Fastq1:
type: File
inputBinding:
position: 1
Fastq2:
type:
- File
- 'null'
inputBinding:
position: 3
outputs:
HTML:
type:
type: array
items: File
outputBinding:
glob:
- '*.html'
Zip:
type:
type: array
items: File
outputBinding:
glob:
- '*.zip'
arguments:
- position: 4
prefix: -o
valueFrom: $(runtime.outdir)
- position: 1
prefix: -t
valueFrom: '2'
baseCommand:
- fastqc#!/usr/bin/env cwltool
cwlVersion: v1.0
class: CommandLineTool
inputs:
inputFastq:
type: File
inputBinding:
position: 1
stdout: test.fasta
outputs:
outputFasta:
type: File
streamable: true
outputBinding:
glob: test.fasta
arguments:
- 'NR%4 == 1 {print ">" substr($0, 2)}NR%4 == 2 {print}'
baseCommand:
- awk#!/usr/bin/env cwltool
cwlVersion: v1.0
class: CommandLineTool
baseCommand: [wc, -l]
inputs:
inputFasta:
type: File
inputBinding:
position: 1
stdout: lineCount.tsv
outputs:
outputCount:
type: File
streamable: true
outputBinding:
glob: lineCount.tsvcwlVersion: v1.0
class: Workflow
inputs:
ipFQ: File
outputs:
count_out:
type: File
outputSource: count/outputCount
fqTOfaOut:
type: File
outputSource: convert/outputFasta
steps:
convert:
run: tool-fqTOfa.cwl
in:
inputFastq: ipFQ
out: [outputFasta]
count:
run: tool-countLines.cwl
in:
inputFasta: convert/outputFasta
out: [outputCount]requirements:
- class: DockerRequirement
dockerPull: ubuntu:latestrequirements:
- class: DockerRequirement
dockerPull: 079623148045.dkr.ecr.eu-central-1.amazonaws.com/cp-prod/XXXXXXXXXX:latest% icav2 projects create basic-cli-tutorial --region c39b1feb-3e94-4440-805e-45e0c76462bf% icav2 projects enter basic-cli-tutorial% icav2 projects list<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<pd:pipeline xmlns:pd="xsd://www.illumina.com/ica/cp/pipelinedefinition" code="" version="1.0">
<pd:dataInputs>
<pd:dataInput code="ipFQ" format="FASTQ" type="FILE" required="true" multiValue="false">
<pd:label>ipFQ</pd:label>
<pd:description></pd:description>
</pd:dataInput>
</pd:dataInputs>
<pd:steps/>
</pd:pipeline>% icav2 projectpipelines create cwl cli-tutorial --workflow workflow.cwl --tool tool-fqTOfa.cwl --tool tool-countLines.cwl --parameter parameters.xml --storage-size small --description "cli tutorial pipeline"% icav2 projectpipelines list
ID CODE DESCRIPTION
6779fa3b-e2bc-42cb-8396-32acee8b6338 cli-tutorial cli tutorial pipeline @SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36
AAGTTACCCTTAACAACTTAAGGGTTTTCAAATAGA
+SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36
IIIIIIIIIIIIIIIIIIIIDIIIIIII>IIIIII/
@SRR001666.2 071112_SLXA-EAS1_s_7:5:1:801:338 length=36
AGCAGAAGTCGATGATAATACGCGTCGTTTTATCAT
+SRR001666.2 071112_SLXA-EAS1_s_7:5:1:801:338 length=36
IIIIIIIIIIIIIIIIIIIIIIGII>IIIII-I)8I
@SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36
AAGTTACCCTTAACAACTTAAGGGTTTTCAAATAGA
+SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36
IIIIIIIIIIIIIIIIIIIIDIIIIIII>IIIIII/
@SRR001666.2 071112_SLXA-EAS1_s_7:5:1:801:338 length=36
AGCAGAAGTCGATGATAATACGCGTCGTTTTATCAT
+SRR001666.2 071112_SLXA-EAS1_s_7:5:1:801:338 length=36
IIIIIIIIIIIIIIIIIIIIIIGII>IIIII-I)8I
@SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36
AAGTTACCCTTAACAACTTAAGGGTTTTCAAATAGA
+SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36
IIIIIIIIIIIIIIIIIIIIDIIIIIII>IIIIII/
@SRR001666.2 071112_SLXA-EAS1_s_7:5:1:801:338 length=36
AGCAGAAGTCGATGATAATACGCGTCGTTTTATCAT
+SRR001666.2 071112_SLXA-EAS1_s_7:5:1:801:338 length=36
IIIIIIIIIIIIIIIIIIIIIIGII>IIIII-I)8I% icav2 projectdata upload test.fastq /
oldFilename= test.fastq en newFilename= test.fastq
bucket= stratus-gds-use1 prefix= 0a488bb2-578b-404a-e09d-08d9e3343b2b/test.fastq
Using: 1 workers to upload 1 files
15:23:32: [0] Uploading /Users/user1/Documents/icav2_validation/for_tutorial/working/test.fastq
15:23:33: [0] Uploaded /Users/user1/Documents/icav2_validation/for_tutorial/working/test.fastq to /test.fastq in 794.511591ms
Finished uploading 1 files in 795.244677ms
% icav2 projectdata list
PATH NAME TYPE STATUS ID OWNER
/test.fastq test.fastq FILE AVAILABLE fil.c23246bd7692499724fe08da020b1014 4b197387-e692-4a78-9304-c7f73ad75e44% icav2 projectpipelines start cwl cli-tutorial --type-input STRUCTURED --input ipFQ:fil.c23246bd7692499724fe08da020b1014 --user-reference tut-test
analysisStorage.description 1.2 TB
analysisStorage.id 6e1b6c8f-f913-48b2-9bd0-7fc13eda0fd0
analysisStorage.name Small
analysisStorage.ownerId 8ec463f6-1acb-341b-b321-043c39d8716a
analysisStorage.tenantId f91bb1a0-c55f-4bce-8014-b2e60c0ec7d3
analysisStorage.tenantName ica-cp-admin
analysisStorage.timeCreated 2021-11-05T10:28:20Z
analysisStorage.timeModified 2021-11-05T10:28:20Z
id 461d3924-52a8-45ef-ab62-8b2a29621021
ownerId 7fa2b641-1db4-3f81-866a-8003aa9e0818
pipeline.analysisStorage.description 1.2 TB
pipeline.analysisStorage.id 6e1b6c8f-f913-48b2-9bd0-7fc13eda0fd0
pipeline.analysisStorage.name Small
pipeline.analysisStorage.ownerId 8ec463f6-1acb-341b-b321-043c39d8716a
pipeline.analysisStorage.tenantId f91bb1a0-c55f-4bce-8014-b2e60c0ec7d3
pipeline.analysisStorage.tenantName ica-cp-admin
pipeline.analysisStorage.timeCreated 2021-11-05T10:28:20Z
pipeline.analysisStorage.timeModified 2021-11-05T10:28:20Z
pipeline.code cli-tutorial
pipeline.description Test, prepared parameters file from working GUI
pipeline.id 6779fa3b-e2bc-42cb-8396-32acee8b6338
pipeline.language CWL
pipeline.ownerId 7fa2b641-1db4-3f81-866a-8003aa9e0818
pipeline.tenantId d0696494-6a7b-4c81-804d-87bda2d47279
pipeline.tenantName icav2-entprod
pipeline.timeCreated 2022-03-10T13:13:05Z
pipeline.timeModified 2022-03-10T13:13:05Z
reference tut-test-cli-tutorial-eda7ee7a-8c65-4c0f-bed4-f6c2d21119e6
status REQUESTED
summary
tenantId d0696494-6a7b-4c81-804d-87bda2d47279
tenantName icav2-entprod
timeCreated 2022-03-10T20:42:42Z
timeModified 2022-03-10T20:42:43Z
userReference tut-test% icav2 projectanalyses get 461d3924-52a8-45ef-ab62-8b2a29621021
analysisStorage.description 1.2 TB
analysisStorage.id 6e1b6c8f-f913-48b2-9bd0-7fc13eda0fd0
analysisStorage.name Small
analysisStorage.ownerId 8ec463f6-1acb-341b-b321-043c39d8716a
analysisStorage.tenantId f91bb1a0-c55f-4bce-8014-b2e60c0ec7d3
analysisStorage.tenantName ica-cp-admin
analysisStorage.timeCreated 2021-11-05T10:28:20Z
analysisStorage.timeModified 2021-11-05T10:28:20Z
endDate 2022-03-10T21:00:33Z
id 461d3924-52a8-45ef-ab62-8b2a29621021
ownerId 7fa2b641-1db4-3f81-866a-8003aa9e0818
pipeline.analysisStorage.description 1.2 TB
pipeline.analysisStorage.id 6e1b6c8f-f913-48b2-9bd0-7fc13eda0fd0
pipeline.analysisStorage.name Small
pipeline.analysisStorage.ownerId 8ec463f6-1acb-341b-b321-043c39d8716a
pipeline.analysisStorage.tenantId f91bb1a0-c55f-4bce-8014-b2e60c0ec7d3
pipeline.analysisStorage.tenantName ica-cp-admin
pipeline.analysisStorage.timeCreated 2021-11-05T10:28:20Z
pipeline.analysisStorage.timeModified 2021-11-05T10:28:20Z
pipeline.code cli-tutorial
pipeline.description Test, prepared parameters file from working GUI
pipeline.id 6779fa3b-e2bc-42cb-8396-32acee8b6338
pipeline.language CWL
pipeline.ownerId 7fa2b641-1db4-3f81-866a-8003aa9e0818
pipeline.tenantId d0696494-6a7b-4c81-804d-87bda2d47279
pipeline.tenantName icav2-entprod
pipeline.timeCreated 2022-03-10T13:13:05Z
pipeline.timeModified 2022-03-10T13:13:05Z
reference tut-test-cli-tutorial-eda7ee7a-8c65-4c0f-bed4-f6c2d21119e6
startDate 2022-03-10T20:42:42Z
status SUCCEEDED
summary
tenantId d0696494-6a7b-4c81-804d-87bda2d47279
tenantName icav2-entprod
timeCreated 2022-03-10T20:42:42Z
timeModified 2022-03-10T21:00:33Z
userReference tut-test % icav2 projectpipelines start cwl cli-tutorial --data-id fil.c23246bd7692499724fe08da020b1014 --input-json '{
"ipFQ": {
"class": "File",
"path": "test.fastq"
}
}' --type-input JSON --user-reference tut-test-json













#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: CommandLineTool
requirements:
- class: InlineJavascriptRequirement
label: fastp
doc: Modified from https://github.com/nigyta/bact_genome/blob/master/cwl/tool/fastp/fastp.cwl
inputs:
fastq1:
type: File
inputBinding:
prefix: -i
fastq2:
type:
- File
- 'null'
inputBinding:
prefix: -I
threads:
type:
- int
- 'null'
default: 1
inputBinding:
prefix: --thread
qualified_phred_quality:
type:
- int
- 'null'
default: 20
inputBinding:
prefix: --qualified_quality_phred
unqualified_phred_quality:
type:
- int
- 'null'
default: 20
inputBinding:
prefix: --unqualified_percent_limit
min_length_required:
type:
- int
- 'null'
default: 50
inputBinding:
prefix: --length_required
force_polyg_tail_trimming:
type:
- boolean
- 'null'
inputBinding:
prefix: --trim_poly_g
disable_trim_poly_g:
type:
- boolean
- 'null'
default: true
inputBinding:
prefix: --disable_trim_poly_g
base_correction:
type:
- boolean
- 'null'
default: true
inputBinding:
prefix: --correction
outputs:
out_fastq1:
type: File
outputBinding:
glob:
- $(inputs.fastq1.nameroot).fastp.fastq
out_fastq2:
type:
- File
- 'null'
outputBinding:
glob:
- $(inputs.fastq2.nameroot).fastp.fastq
html_report:
type: File
outputBinding:
glob:
- fastp.html
json_report:
type: File
outputBinding:
glob:
- fastp.json
arguments:
- prefix: -o
valueFrom: $(inputs.fastq1.nameroot).fastp.fastq
- |
${
if (inputs.fastq2){
return '-O';
} else {
return '';
}
}
- |
${
if (inputs.fastq2){
return inputs.fastq2.nameroot + ".fastp.fastq";
} else {
return '';
}
}
baseCommand:
- fastp#!/usr/bin/env cwl-runner
cwlVersion: cwl:v1.0
class: CommandLineTool
label: MultiQC
doc: MultiQC is a tool to create a single report with interactive plots for multiple
bioinformatics analyses across many samples.
inputs:
files:
type:
- type: array
items: File
- 'null'
doc: Files containing the result of quality analysis.
inputBinding:
position: 2
directories:
type:
- type: array
items: Directory
- 'null'
doc: Directories containing the result of quality analysis.
inputBinding:
position: 3
report_name:
type: string
doc: Name of output report, without path but with full file name (e.g. report.html).
default: multiqc_report.html
inputBinding:
position: 1
prefix: -n
outputs:
report:
type: File
outputBinding:
glob:
- '*.html'
baseCommand:
- multiqc#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: CommandLineTool
requirements:
- class: InlineJavascriptRequirement
- class: InitialWorkDirRequirement
listing:
- entry: "import argparse\nimport os\nimport json\n\n# Create argument parser\n\
parser = argparse.ArgumentParser()\nparser.add_argument(\"-i\", \"--inputFiles\"\
, type=str, required=True, help=\"Input files\")\n\n# Parse the arguments\n\
args = parser.parse_args()\n\n# Split the inputFiles string into a list of file\
\ paths\ninput_files = args.inputFiles.split(',')\n\n# Sort the input files\
\ by the base filename\ninput_files = sorted(input_files, key=lambda x: os.path.basename(x))\n\
\n\n# Separate the files into left and right arrays, preserving the order\n\
left_files = [file for file in input_files if '_R1_' in os.path.basename(file)]\n\
right_files = [file for file in input_files if '_R2_' in os.path.basename(file)]\n\
\n# Print the left files for debugging\nprint(\"Left files:\", left_files)\n\
\n# Print the left files for debugging\nprint(\"Right files:\", right_files)\n\
\n# Ensure left and right files are matched\nassert len(left_files) == len(right_files),\
\ \"Mismatch in number of left and right files\"\n\n \n# Write the left files\
\ to a JSON file\nwith open('left_files.json', 'w') as outfile:\n left_files_objects\
\ = [{\"class\": \"File\", \"path\": file} for file in left_files]\n json.dump(left_files_objects,\
\ outfile)\n\n# Write the right files to a JSON file\nwith open('right_files.json',\
\ 'w') as outfile:\n right_files_objects = [{\"class\": \"File\", \"path\"\
: file} for file in right_files]\n json.dump(right_files_objects, outfile)\n\
\n"
entryname: spread_script.py
writable: false
label: spread_items
inputs:
inputFiles:
type:
type: array
items: File
inputBinding:
separate: false
prefix: -i
itemSeparator: ','
outputs:
leftFiles:
type:
type: array
items: File
outputBinding:
glob:
- left_files.json
loadContents: true
outputEval: $(JSON.parse(self[0].contents))
rightFiles:
type:
type: array
items: File
outputBinding:
glob:
- right_files.json
loadContents: true
outputEval: $(JSON.parse(self[0].contents))
baseCommand:
- python3
- spread_script.py