Chapter 31 Practical Perl program
31.1 Add annotation information to DESeq2 results
Imaging we have a table that stores that differentially expressed genes information.
It includes three columns (tab-delimited):
cat data/DEG_list.txt## #gene_id log2fc  p-val
## gene1    2   0.01
## gene2    3   0.04
## gene3    -2  0.06
## gene4    -8  0.001We have another table which has the annotation information of each gene:
cat data/gene_annotation.txt## gene_id  gene_shortname
## gene1    ROS
## gene2    WRKY
## gene3    ZmCCT
## gene4    WRKY1
## gene5    WRKY2
## gene6    wrky
## gene10   MCU31.2 Merge overlap genomic regions
#!/usr/bin/perl -w
use strict;
my ($DMR, $out) = @ARGV;
open DMR, $DMR or die "File not found: $!";
open OUT, "+>$out" or die "$!";
#read  a  line,
# go to next line
#     check overlap
#      if overlap: merge two regions. Then go to the next line.
#      if not overlap: print whatever we have now; then start from a line
while(my $region = <DMR>){
    chomp $region;
    my ($chrom1, $start1, $end1) = split(/\t/, $region);
    BLOCK:{
        my $new_region = <DMR>;
        if(!$new_region){
            print OUT "$chrom1\t$start1\t$end1\n";
        }else{
            chomp $new_region;
            my ($chrom2, $start2, $end2) = split(/\t/, $new_region);
             ## start1===============end1
             ##           start2=====================end2
             ## start1===============================new_end1
            if($chrom2 eq $chrom1 && $start2 >= $start1 && $start2 <=$end1){
                $end1 = $end2;
                redo BLOCK;
            }else{
                print OUT "$chrom1\t$start1\t$end1\n";
                ($chrom1, $start1, $end1) = ($chrom2, $start2, $end2);
                redo BLOCK;
            }
        }
    }
}cat data/DMR_region.txt## chr1 100 200
## chr1 150 250
## chr1 200 300
## chr1 500 600
## chr1 550 650
## chr2 300 800
## chr2 400 1000
## chr3 500 1000perl code_perl/genomic_coordinate_merge.pl  data/DMR_region.txt  data/DMR_region_merged.txt 
cat data/DMR_region_merged.txt ## chr1 100 300
## chr1 500 650
## chr2 300 1000
## chr3 500 1000