QIIME 2 Pipline
Nucleic Acid Quantification
INSTALL MINICONDA
Download Distribution
https://docs.conda.io/en/latest/miniconda.html
Install Distribution
bash Miniconda3-latest-MacOSX-x86_64.sh
Follow the prompts on the installer screens.
INSTALL QIIME 2
Update Miniconda
conda update conda
Install wget
conda install wget
Install QIIME 2 within a conda environment
wget https://data.qiime2.org/distro/core/qiime2-2022.2-py38-osx-conda.yml
conda env create -n qiime2-2022.2 --file qiime2-2022.2-py38-osx-conda.yml
Activate the conda environment
conda activate qiime2-2022.2
Test your installation
qiime --help
QIIME ANALYSIS
Based on "Comprehensive end-to-end microbiome analysis using QIIME 2"
Create Metadata File
https://docs.qiime2.org/2022.2/tutorials/metadata/
| sample-id | subject | week |
|---|---|---|
| RD01 | ROLAND | 1 |
| JS01 | JULIAN | 1 |
Create the Manifest File
echo -e "sample-id\tabsolute-filepath" > manifest.tsv
Add FASTQ Files to Manifest
#!/usr/bin/env python3
from collections import defaultdict
import glob
import os
import send2trash
def multi_dict(K, type):
if K == 1:
return defaultdict(type)
else:
return defaultdict(lambda: multi_dict(K-1, type))
def delete_file(file_in):
file_exists = os.path.isfile(file_in)
if file_exists == True:
send2trash.send2trash(file_in)
dir_abs = os.path.abspath("DATADIRECTORY")
file_paths_rel = glob.glob(dir_abs + "/*.fastq.gz")
file_paths_abs = []
for path in file_paths_rel:
path_abs = os.path.abspath(path)
file_paths_abs.append(path_abs)
files = multi_dict(3, int)
for path in file_paths_abs:
file_dir = os.path.split(path)
file_name_parts = file_dir[1].split("_")
sample_ID = file_name_parts[0]
if file_name_parts[2] == "R1":
direction = "forward-absolute-filepath"
elif file_name_parts[2] == "R2":
direction = "reverse-absolute-filepath"
files[sample_ID][direction]=path
lines = []
for i in files.keys():
forward = files[i]["forward-absolute-filepath"]
reverse = files[i]["reverse-absolute-filepath"]
data = [i,forward,reverse]
lines.append(data)
writer_name = "manifest.tsv"
delete_file(writer_name)
writer = open(writer_name, "w")
header = "sample-id\tforward-absolute-filepath\treverse-absolute-filepath\n"
writer.write(header)
for line in lines:
line = str(line[0]) + "\t" + str(line[1]) + "\t" + str(line[2]) + "\n"
writer.write(line)
writer.close()
Use the manifest file to import the sequences into QIIME 2
qiime tools import \
--input-path manifest.tsv \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-format PairedEndFastqManifestPhred33V2 \
--output-path se-demux.qza
qiime demux summarize \
--i-data se-demux.qza \
--o-visualization se-demux.qzv
qiime tools view se-demux.qzv
qiime quality-filter q-score \
--i-demux se-demux.qza \
--o-filtered-sequences demux-filtered.qza \
--o-filter-stats demux-filter-stats.qza
qiime deblur denoise-16S \
--i-demultiplexed-seqs demux-filtered.qza \
--p-trim-length 200 \
--p-sample-stats \
--p-jobs-to-start 4 \
--o-stats deblur-stats.qza \
--o-representative-sequences rep-seqs-deblur.qza \
--o-table table-deblur.qza
qiime deblur visualize-stats \
--i-deblur-stats deblur-stats.qza \
--o-visualization deblur-stats.qzv
qiime feature-table tabulate-seqs \
--i-data rep-seqs-deblur.qza \
--o-visualization rep-seqs-deblur.qzv
qiime feature-table summarize \
--i-table table-deblur.qza \
--m-sample-metadata-file metadata.tsv \
--o-visualization table-deblur.qzv
wget -O "sepp-refs-gg-13-8.qza" \
"https://data.qiime2.org/2019.10/common/sepp-refs-gg-13-8.qza"
qiime fragment-insertion sepp \
--i-representative-sequences rep-seqs-deblur.qza \
--i-reference-database sepp-refs-gg-13-8.qza \
--p-threads 4 \
--o-tree insertion-tree.qza \
--o-placements insertion-placements.qza
qiime fragment-insertion filter-features \
--i-table table-deblur.qza \
--i-tree insertion-tree.qza \
--o-filtered-table filtered-table-deblur.qza \
--o-removed-table removed-table.qza
qiime empress tree-plot \
--i-tree insertion-tree.qza \
--m-feature-metadata-file bespoke-taxonomy2.qza \
--o-visualization tree-viz.qzv
qiime taxa barplot \
--i-table filtered-table-deblur.qza \
--i-taxonomy bespoke-taxonomy2.qza \
--m-metadata-file metadata.tsv \
--o-visualization bar-plots.qzv
Classifying
For V3-V4 region sequences
wget \
-O "human-stool.qza" \
"https://github.com/BenKaehler/readytowear/raw/master/data/gg_13_8/515f-806r/human-stool.qza"
wget \
-O "ref-tax.qza" \
"https://github.com/BenKaehler/readytowear/raw/master/data/gg_13_8/515f-806r/ref-tax.qza"
wget \
-O "ref-seqs-v4.qza" \
"https://github.com/BenKaehler/readytowear/raw/master/data/gg_13_8/515f-806r/ref-seqs.qza"
qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ref-seqs-v4.qza \
--i-reference-taxonomy ref-tax.qza \
--i-class-weight human-stool.qza \
--o-classifier gg138_v4_human-stool_classifier.qza
qiime feature-classifier classify-sklearn \
--i-reads rep-seqs-deblur.qza \
--i-classifier gg138_v4_human-stool_classifier.qza \
--o-classification bespoke-taxonomy.qza
For V2-V3 region sequences
# OUR V2-V3 SEQENCES FROM UB GENOME CORE DON'T OVERLAP FULLY WITH 515f-806r, SO YOU COULD SKIP TRAINING AND USE THE FULL-LENGTH WEIGHTED PRE-TRAINED CLASSIFIER FROM https://zenodo.org/record/6395539. OTHERWISE YOUR CLASSIFICATION WILL BE LOW_RES
wget "https://zenodo.org/record/6395539/files/full-length-human-stool-classifier.qza?download=1"
qiime feature-classifier classify-sklearn \
--i-reads rep-seqs-deblur.qza \
--i-classifier full-length-human-stool-classifier.qza \
--o-classification bespoke-taxonomy2.qza
# YOU CAN ALSO DOWNLOAD THE REF-SEQS and REF-TAX FILES AND WEIGHT FILES
wget \
-O "human-stool.qza" \
"https://github.com/BenKaehler/readytowear/raw/master/data/silva_138_1/full_length/human-stool.qza"
wget \
-O "ref-seqs.qza" \
"https://zenodo.org/record/6395539/files/ref-seqs.qza?download=1"
wget \
-O "ref-tax.qza" \
"https://zenodo.org/record/6395539/files/ref-tax.qza?download=1"
# AND THEN TRAIN THE CLASSIFIER
qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ref-seqs.qza \
--i-reference-taxonomy ref-tax.qza \
--i-class-weight human-stool.qza \
--o-classifier gg138_v4_human-stool_classifier.qza
qiime metadata tabulate \
--m-input-file bespoke-taxonomy.qza \
--m-input-file rep-seqs-deblur.qza \
--o-visualization bespoke-taxonomy.qzv
qiime feature-table summarize \
--i-table filtered-table-deblur.qza \
--m-sample-metadata-file metadata.tsv \
--o-visualization feature-table-summary.qzv
qiime diversity alpha-rarefaction \
--i-table filtered-table-deblur.qza \
--i-phylogeny insertion-tree.qza \
--p-max-depth 10000 \
--m-metadata-file metadata.tsv \
--o-visualization feature-table-alpha-rarefaction.qzv
qiime feature-table filter-samples \
--i-table filtered-table-deblur.qza \
--m-metadata-file metadata.tsv \
--p-where "[subject]='JULIAN'" \
--o-filtered-table child-table-j.qza
qiime feature-table summarize \
--i-table child-table-j.qza \
--m-sample-metadata-file metadata.tsv \
--o-visualization child-table-j.qzv
qiime diversity alpha-rarefaction \
--i-table child-table-j.qza \
--i-phylogeny insertion-tree.qza \
--p-max-depth 10000 \
--m-metadata-file metadata.tsv \
--o-visualization child-alpha-rarefaction-j.qzv
https://github.com/biocore/empress
pip install cython "numpy >= 1.12.0"
pip install empress
qiime dev refresh-cache
qiime empress --help
qiime empress tree-plot \--i-tree insertion-tree.qza \--o-visualization tree-viz.qzv