conda create -n rnaseq_hisat2 python=3.10 -y
conda activate rnaseq_hisat2
conda install -c bioconda fastqc cutadapt hisat2 samtools subread -y
mkdir -p raw_data trimmed_data qc_reports reference hisat2_index alignments counts logs
cd raw_data
wget <dataset_url> -O sample_R1.fastq.gz
wget <dataset_url> -O sample_R2.fastq.gz
fastqc raw_data/sample_R1.fastq.gz raw_data/sample_R2.fastq.gz -o qc_reports
cutadapt \
-a AGATCGGAAGAGC \
-A AGATCGGAAGAGC \
-o trimmed_data/sample_trimmed_R1.fastq.gz \
-p trimmed_data/sample_trimmed_R2.fastq.gz \
raw_data/sample_R1.fastq.gz raw_data/sample_R2.fastq.gz
cd reference
wget <genome_fasta_url> -O genome.fa.gz
wget <annotation_gtf_url> -O annotation.gtf.gz
gunzip genome.fa.gz
gunzip annotation.gtf.gz
hisat2-build reference/genome.fa hisat2_index/genome_index
hisat2 \
-x hisat2_index/genome_index \
-1 trimmed_data/sample_trimmed_R1.fastq.gz \
-2 trimmed_data/sample_trimmed_R2.fastq.gz \
-S alignments/sample.sam
samtools view -bS alignments/sample.sam > alignments/sample.bam
samtools sort alignments/sample.bam -o alignments/sample.sorted.bam
samtools index alignments/sample.sorted.bam
samtools flagstat alignments/sample.sorted.bam > logs/sample.flagstat.txt
featureCounts \
-a reference/annotation.gtf \
-o counts/gene_counts.txt \
alignments/sample.sorted.bam