酵母同義和非同義的snp的程式

gaorongchao1990626發表於2013-09-02





use strict;
use warnings;

my $filename;
my @filename;
my $i;
#氨基酸hash
my(%genetic_code) = (     
         
    'TCA' => 'S',    # Serine     
    'TCC' => 'S',    # Serine     
    'TCG' => 'S',    # Serine     
    'TCT' => 'S',    # Serine     
    'TTC' => 'F',    # Phenylalanine     
    'TTT' => 'F',    # Phenylalanine     
    'TTA' => 'L',    # Leucine     
    'TTG' => 'L',    # Leucine     
    'TAC' => 'Y',    # Tyrosine      
    'TAT' => 'Y',    # Tyrosine     
    'TAA' => '_',    # Stop     
    'TAG' => '_',    # Stop     
    'TGC' => 'C',    # Cysteine     
    'TGT' => 'C',    # Cysteine     
    'TGA' => '_',    # Stop     
    'TGG' => 'W',    # Tryptophan     
    'CTA' => 'L',    # Leucine     
    'CTC' => 'L',    # Leucine     
    'CTG' => 'L',    # Leucine     
    'CTT' => 'L',    # Leucine     
    'CCA' => 'P',    # Proline     
    'CCC' => 'P',    # Proline     
    'CCG' => 'P',    # Proline     
    'CCT' => 'P',    # Proline     
    'CAC' => 'H',    # Histidine     
    'CAT' => 'H',    # Histidine     
    'CAA' => 'Q',    # Glutamine     
    'CAG' => 'Q',    # Glutamine     
    'CGA' => 'R',    # Arginine     
    'CGC' => 'R',    # Arginine     
    'CGG' => 'R',    # Arginine     
    'CGT' => 'R',    # Arginine     
    'ATA' => 'I',    # Isoleucine     
    'ATC' => 'I',    # Isoleucine     
    'ATT' => 'I',    # Isoleucine     
    'ATG' => 'M',    # Methionine     
    'ACA' => 'T',    # Threonine     
    'ACC' => 'T',    # Threonine     
    'ACG' => 'T',    # Threonine     
    'ACT' => 'T',    # Threonine     
    'AAC' => 'N',    # Asparagine     
    'AAT' => 'N',    # Asparagine     
    'AAA' => 'K',    # Lysine     
    'AAG' => 'K',    # Lysine     
    'AGC' => 'S',    # Serine     
    'AGT' => 'S',    # Serine     
    'AGA' => 'R',    # Arginine     
    'AGG' => 'R',    # Arginine     
    'GTA' => 'V',    # Valine     
    'GTC' => 'V',    # Valine     
    'GTG' => 'V',    # Valine     
    'GTT' => 'V',    # Valine     
    'GCA' => 'A',    # Alanine     
    'GCC' => 'A',    # Alanine     
    'GCG' => 'A',    # Alanine     
    'GCT' => 'A',    # Alanine         
    'GAC' => 'D',    # Aspartic Acid     
    'GAT' => 'D',    # Aspartic Acid     
    'GAA' => 'E',    # Glutamic Acid     
    'GAG' => 'E',    # Glutamic Acid     
    'GGA' => 'G',    # Glycine     
    'GGC' => 'G',    # Glycine     
    'GGG' => 'G',    # Glycine     
    'GGT' => 'G',    # Glycine     
    );     


my $jianji_number=0;
my %hash;
my $DNA;
my @DNA;
my $jianji;
my $first;
my $second;
my $base;
my @information;
my $key1;
my $key2;
my $key3;
my $rever_first;
my $rever_second;
my $yushu;
my $position;
#一下程式對每一個染色體做一個hash
@filename=qw/I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI/;
foreach $filename (@filename)
{
	$jianji_number=0;
	undef(@DNA);
	open(IN1,"chr$filename")||die("can not open");
	$i=<IN1>;
	@DNA=<IN1>;
	$DNA=join('',@DNA);
	$DNA=~s/\s//g;
	for($position=0;$position<length $DNA;++$position)#hash中後面一個數字就是鹼基的位置。本來少1個,但是我們人為的加上了1
	{
		$base=substr($DNA,$position,1);
		$hash{$filename}{$position+1}="$base";
	}
}


open(OUT,">TY_and_FTY.txt")||die("can not open");
open(IN,"information_of_tongyi_or_not.txt")||die("can not open");
while(<IN>)
{
	chomp;
	@information=split/\s+/,$_;
	if($information[8] eq "+")
	{
	    $key1=$hash{$information[0]}{$information[4]};
	    $key2=$hash{$information[0]}{$information[4]+1};
	    $key3=$hash{$information[0]}{$information[4]+2};
	    if($key1 eq "A" && $key2 eq "T" && $key3 eq "G")
	    {
		    if($information[7]==0)
		    {
			    $first=join('',$information[2],$hash{$information[0]}{$information[1]+1},$hash{$information[0]}{$information[1]+2});
			    $second=join('',$information[3],$hash{$information[0]}{$information[1]+1},$hash{$information[0]}{$information[1]+2});
			    if($genetic_code{$first} eq $genetic_code{$second})
			    {
				    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$first} TY0\n";
			    }
			    else
			    {
				    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n";
			    }
		    }
		    elsif($information[7]==1)
		    {
			    $first=join('',$hash{$information[0]}{$information[1]-1},$information[2],$hash{$information[0]}{$information[1]+1});
			    $second=join('',$hash{$information[0]}{$information[1]-1},$information[3],$hash{$information[0]}{$information[1]+1});
			    if($genetic_code{$first} eq $genetic_code{$second})
			    {
				    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} TY\n";
			    }
			    else
			    {
				    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n";
			    }
		    }
		    elsif($information[7]==2)
		    {
			    $first=join('',$hash{$information[0]}{$information[1]-2},$hash{$information[0]}{$information[1]-1},$information[2]);
			    $second=join('',$hash{$information[0]}{$information[1]-2},$hash{$information[0]}{$information[1]-1},$information[3]);
			    if($genetic_code{$first} eq $genetic_code{$second})
			    {
				    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} TY\n";
			    }
			    else
			    {
				    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n";
			    }
		    }
	    }
	    else
	    {
		    next;
	    }
    }
	elsif($information[8] eq "-")
	{
	    $key1=$hash{$information[0]}{$information[5]};
	    $key2=$hash{$information[0]}{$information[5]-1};
	    $key3=$hash{$information[0]}{$information[5]-2};
	    if($key1 eq "T" && $key2 eq "A" && $key3 eq "C")#因為這裡是負鏈,所以對應的也應該是ATC的互補鏈,下面的翻譯過程也要注意,所有的要先對應到正鏈的位置上,然後在進行翻譯。
		{
		    print "T\n";
			$yushu=($information[5]+1-$information[1])%3;
			if($yushu==1)                #當反向的時候餘數為1,那麼就是氨基酸的第一位,餘數為2,那麼是氨基酸的第二位,餘數為o為氨基酸的第三位
			{
				$first=join('',$information[2],$hash{$information[0]}{$information[1]-1},$hash{$information[0]}{$information[1]-2});
				$second=join('',$information[3],$hash{$information[0]}{$information[1]-1},$hash{$information[0]}{$information[1]-2});
				$rever_first=revcom($first);
				$rever_second=revcom($second);
				if($genetic_code{$rever_first} eq $genetic_code{$rever_second})
				{
					print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n";
				}
				else
				{
					print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n";
				}
			}
			elsif($yushu==2)
			{
				$first=join('',$hash{$information[0]}{$information[1]+1},$information[2],$hash{$information[0]}{$information[1]-1});
				$second=join('',$hash{$information[0]}{$information[1]+1},$information[3],$hash{$information[0]}{$information[1]-1});
				$rever_first=revcom($first);
				$rever_second=revcom($second);
				if($genetic_code{$rever_first} eq $genetic_code{$rever_second})
				{
					print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n";
				}
				else
				{
					print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n";
				}

			}
			elsif($yushu==0)
			{
				$first=join('',$hash{$information[0]}{$information[1]+2},$hash{$information[0]}{$information[1]+1},$information[2]);
				$second=join('',$hash{$information[0]}{$information[1]+2},$hash{$information[0]}{$information[1]+1},$information[3]);
				$rever_first=revcom($first);
				$rever_second=revcom($second);
				if($genetic_code{$rever_first} eq $genetic_code{$rever_second})
				{
					print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n";
				}
				else
				{
					print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n";
				}
			}
		}
	}
	else 
	{
		next;
	}
}



#子程式:獲得互補序列
sub revcom  
{  
    # A subroutine to compute the reverse complement of DNA sequence   
    # 一個獲取DNA互補序列的子程式  
    my($dna)=@_;  
    print $dna."\n";  
    my ($revcom)=reverse($dna);  
    $revcom=~tr/ACGTacgt/TGCAtgca/;  
    return $revcom;  
}  


相關文章