Download indexes and annotations
RNAEditor requires a set of annotation files and databases to detect editing sites. Either download one of the following annotation bundles or download the files manually by executing the commands below.
Human
Mouse
Unix commands to download GRCH38 manually
wget -qO- ftp://ftp.ensembl.org/pub/release-83/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz |gunzip -c > Homo_sapiens.GRCh38.dna.primary_assembly.fa
bwa index Homo_sapiens.GRCh38.dna.primary_assembly.fa; samtools faidx Homo_sapiens.GRCh38.dna.primary_assembly.fa
java -jar /usr/local/bin/picard-tools/CreateSequenceDictionary.jar REFERENCE=Homo_sapiens.GRCh38.dna.primary_assembly.fa OUTPUT=Homo_sapiens.GRCh38.dna.primary_assembly.dict
wget -qO- ftp://ftp.ensembl.org/pub/release-83/gtf/homo_sapiens/Homo_sapiens.GRCh38.83.gtf.gz |gunzip -c > Homo_sapiens.GRCh38.83.gtf
wget -qO- ftp://ftp.ensembl.org/pub/release-83/variation/vcf/homo_sapiens/Homo_sapiens.vcf.gz |gunzip -c |awk 'BEGIN{FS="\t";OFS="\t"};match($5,/\./){gsub(/\./,"N",$5)};$5 == "" && $1 !~ /^#/ {gsub("","N",$5)};$3 ~ /rs193922900/ {$5="TN"};$3 ~ /rs59736472/ {$5="AN"};$5 ~ /H/ {gsub(/H/,"N",$5)};{print $0}' dbSNP.vcf
wget -qO- ftp://ftp.ensembl.org/pub/release-83/variation/vcf/homo_sapiens/ESP65*.vcf.gz |gunzip -c |grep -v ^## |grep -v rs[0-9][0-9] > ESP.vcf
wget -qO- ftp://ftp.ensembl.org/pub/release-83/variation/vcf/homo_sapiens/*HAPMAP*.vcf.gz |gunzip -c |grep -v ^## |grep -v rs[0-9][0-9] > HAPMAP.vcf
- #download Alu regions from the repeat masker
- Link: http://genome.ucsc.edu/cgi-bin/hgTables
- group: Variation and Repeats
- track: RepeatMasker
- table: rmsk
- output format: BED
#run this awk command to make the alu region compatible to the ensemble annotation
awk 'BEGIN{FS="\t";OFS="\t"} match($1,/chr/){$1 = substr($1,4)}{print $0}' yourFile.bed > youFile_noCHR.bed