use strict;
use warnings;
my $filename;
my @filename;
my $i;
#氨基酸hash
my(%genetic_code) = (
'TCA' => 'S', # Serine
'TCC' => 'S', # Serine
'TCG' => 'S', # Serine
'TCT' => 'S', # Serine
'TTC' => 'F', # Phenylalanine
'TTT' => 'F', # Phenylalanine
'TTA' => 'L', # Leucine
'TTG' => 'L', # Leucine
'TAC' => 'Y', # Tyrosine
'TAT' => 'Y', # Tyrosine
'TAA' => '_', # Stop
'TAG' => '_', # Stop
'TGC' => 'C', # Cysteine
'TGT' => 'C', # Cysteine
'TGA' => '_', # Stop
'TGG' => 'W', # Tryptophan
'CTA' => 'L', # Leucine
'CTC' => 'L', # Leucine
'CTG' => 'L', # Leucine
'CTT' => 'L', # Leucine
'CCA' => 'P', # Proline
'CCC' => 'P', # Proline
'CCG' => 'P', # Proline
'CCT' => 'P', # Proline
'CAC' => 'H', # Histidine
'CAT' => 'H', # Histidine
'CAA' => 'Q', # Glutamine
'CAG' => 'Q', # Glutamine
'CGA' => 'R', # Arginine
'CGC' => 'R', # Arginine
'CGG' => 'R', # Arginine
'CGT' => 'R', # Arginine
'ATA' => 'I', # Isoleucine
'ATC' => 'I', # Isoleucine
'ATT' => 'I', # Isoleucine
'ATG' => 'M', # Methionine
'ACA' => 'T', # Threonine
'ACC' => 'T', # Threonine
'ACG' => 'T', # Threonine
'ACT' => 'T', # Threonine
'AAC' => 'N', # Asparagine
'AAT' => 'N', # Asparagine
'AAA' => 'K', # Lysine
'AAG' => 'K', # Lysine
'AGC' => 'S', # Serine
'AGT' => 'S', # Serine
'AGA' => 'R', # Arginine
'AGG' => 'R', # Arginine
'GTA' => 'V', # Valine
'GTC' => 'V', # Valine
'GTG' => 'V', # Valine
'GTT' => 'V', # Valine
'GCA' => 'A', # Alanine
'GCC' => 'A', # Alanine
'GCG' => 'A', # Alanine
'GCT' => 'A', # Alanine
'GAC' => 'D', # Aspartic Acid
'GAT' => 'D', # Aspartic Acid
'GAA' => 'E', # Glutamic Acid
'GAG' => 'E', # Glutamic Acid
'GGA' => 'G', # Glycine
'GGC' => 'G', # Glycine
'GGG' => 'G', # Glycine
'GGT' => 'G', # Glycine
);
my $jianji_number=0;
my %hash;
my $DNA;
my @DNA;
my $jianji;
my $first;
my $second;
my $base;
my @information;
my $key1;
my $key2;
my $key3;
my $rever_first;
my $rever_second;
my $yushu;
my $position;
#一下程式對每一個染色體做一個hash
@filename=qw/I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI/;
foreach $filename (@filename)
{
$jianji_number=0;
undef(@DNA);
open(IN1,"chr$filename")||die("can not open");
$i=<IN1>;
@DNA=<IN1>;
$DNA=join('',@DNA);
$DNA=~s/\s//g;
for($position=0;$position<length $DNA;++$position)#hash中後面一個數字就是鹼基的位置。本來少1個,但是我們人為的加上了1
{
$base=substr($DNA,$position,1);
$hash{$filename}{$position+1}="$base";
}
}
open(OUT,">TY_and_FTY.txt")||die("can not open");
open(IN,"information_of_tongyi_or_not.txt")||die("can not open");
while(<IN>)
{
chomp;
@information=split/\s+/,$_;
if($information[8] eq "+")
{
$key1=$hash{$information[0]}{$information[4]};
$key2=$hash{$information[0]}{$information[4]+1};
$key3=$hash{$information[0]}{$information[4]+2};
if($key1 eq "A" && $key2 eq "T" && $key3 eq "G")
{
if($information[7]==0)
{
$first=join('',$information[2],$hash{$information[0]}{$information[1]+1},$hash{$information[0]}{$information[1]+2});
$second=join('',$information[3],$hash{$information[0]}{$information[1]+1},$hash{$information[0]}{$information[1]+2});
if($genetic_code{$first} eq $genetic_code{$second})
{
print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$first} TY0\n";
}
else
{
print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n";
}
}
elsif($information[7]==1)
{
$first=join('',$hash{$information[0]}{$information[1]-1},$information[2],$hash{$information[0]}{$information[1]+1});
$second=join('',$hash{$information[0]}{$information[1]-1},$information[3],$hash{$information[0]}{$information[1]+1});
if($genetic_code{$first} eq $genetic_code{$second})
{
print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} TY\n";
}
else
{
print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n";
}
}
elsif($information[7]==2)
{
$first=join('',$hash{$information[0]}{$information[1]-2},$hash{$information[0]}{$information[1]-1},$information[2]);
$second=join('',$hash{$information[0]}{$information[1]-2},$hash{$information[0]}{$information[1]-1},$information[3]);
if($genetic_code{$first} eq $genetic_code{$second})
{
print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} TY\n";
}
else
{
print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n";
}
}
}
else
{
next;
}
}
elsif($information[8] eq "-")
{
$key1=$hash{$information[0]}{$information[5]};
$key2=$hash{$information[0]}{$information[5]-1};
$key3=$hash{$information[0]}{$information[5]-2};
if($key1 eq "T" && $key2 eq "A" && $key3 eq "C")#因為這裡是負鏈,所以對應的也應該是ATC的互補鏈,下面的翻譯過程也要注意,所有的要先對應到正鏈的位置上,然後在進行翻譯。
{
print "T\n";
$yushu=($information[5]+1-$information[1])%3;
if($yushu==1) #當反向的時候餘數為1,那麼就是氨基酸的第一位,餘數為2,那麼是氨基酸的第二位,餘數為o為氨基酸的第三位
{
$first=join('',$information[2],$hash{$information[0]}{$information[1]-1},$hash{$information[0]}{$information[1]-2});
$second=join('',$information[3],$hash{$information[0]}{$information[1]-1},$hash{$information[0]}{$information[1]-2});
$rever_first=revcom($first);
$rever_second=revcom($second);
if($genetic_code{$rever_first} eq $genetic_code{$rever_second})
{
print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n";
}
else
{
print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n";
}
}
elsif($yushu==2)
{
$first=join('',$hash{$information[0]}{$information[1]+1},$information[2],$hash{$information[0]}{$information[1]-1});
$second=join('',$hash{$information[0]}{$information[1]+1},$information[3],$hash{$information[0]}{$information[1]-1});
$rever_first=revcom($first);
$rever_second=revcom($second);
if($genetic_code{$rever_first} eq $genetic_code{$rever_second})
{
print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n";
}
else
{
print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n";
}
}
elsif($yushu==0)
{
$first=join('',$hash{$information[0]}{$information[1]+2},$hash{$information[0]}{$information[1]+1},$information[2]);
$second=join('',$hash{$information[0]}{$information[1]+2},$hash{$information[0]}{$information[1]+1},$information[3]);
$rever_first=revcom($first);
$rever_second=revcom($second);
if($genetic_code{$rever_first} eq $genetic_code{$rever_second})
{
print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n";
}
else
{
print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n";
}
}
}
}
else
{
next;
}
}
#子程式:獲得互補序列
sub revcom
{
# A subroutine to compute the reverse complement of DNA sequence
# 一個獲取DNA互補序列的子程式
my($dna)=@_;
print $dna."\n";
my ($revcom)=reverse($dna);
$revcom=~tr/ACGTacgt/TGCAtgca/;
return $revcom;
}