Hallo all,
I have a problem in my script can anybody here solve my problem. or tell me something about it.
I appreciate any help
regards
#!/usr/bin/perl
# Translate a DNA sequence in all six reading frames
use strict;
use warnings;
# Initialize variables
my @file_data = ( );
my $dna = '';
my $revcom = '';
my $protein = '';
my $codon = '';
print "name of the DNA sequence: ";
my @file_data = <STDIN>;
chomp (@file_data);
# Read in the contents of the file " input"
# Extract the sequence data from the contents of the file
$DNA = extract_sequence_from_fasta_data(@file_data);
####################################################################################
##################################subroutine###########################################
# codon2aa
#
# A subroutine to translate a DNA 3-character codon to an amino acid
# Version 1
# This subroutine is commented out because a preferred version of it
# follows.
sub codon2aa {
my($codon) = @_;
if ( $codon =~ /TCA/i ) { return 'S' } # Serine
elsif ( $codon =~ /TCC/i ) { return 'S' } # Serine
elsif ( $codon =~ /TCG/i ) { return 'S' } # Serine
elsif ( $codon =~ /TCT/i ) { return 'S' } # Serine
elsif ( $codon =~ /TTC/i ) { return 'F' } # Phenylalanine
elsif ( $codon =~ /TTT/i ) { return 'F' } # Phenylalanine
elsif ( $codon =~ /TTA/i ) { return 'L' } # Leucine
elsif ( $codon =~ /TTG/i ) { return 'L' } # Leucine
elsif ( $codon =~ /TAC/i ) { return 'Y' } # Tyrosine
elsif ( $codon =~ /TAT/i ) { return 'Y' } # Tyrosine
elsif ( $codon =~ /TAA/i ) { return '_' } # Stop
elsif ( $codon =~ /TAG/i ) { return '_' } # Stop
elsif ( $codon =~ /TGC/i ) { return 'C' } # Cysteine
elsif ( $codon =~ /TGT/i ) { return 'C' } # Cysteine
elsif ( $codon =~ /TGA/i ) { return '_' } # Stop
elsif ( $codon =~ /TGG/i ) { return 'W' } # Tryptophan
elsif ( $codon =~ /CTA/i ) { return 'L' } # Leucine
elsif ( $codon =~ /CTC/i ) { return 'L' } # Leucine
elsif ( $codon =~ /CTG/i ) { return 'L' } # Leucine
elsif ( $codon =~ /CTT/i ) { return 'L' } # Leucine
elsif ( $codon =~ /CCA/i ) { return 'P' } # Proline
elsif ( $codon =~ /CCC/i ) { return 'P' } # Proline
elsif ( $codon =~ /CCG/i ) { return 'P' } # Proline
elsif ( $codon =~ /CCT/i ) { return 'P' } # Proline
elsif ( $codon =~ /CAC/i ) { return 'H' } # Histidine
elsif ( $codon =~ /CAT/i ) { return 'H' } # Histidine
elsif ( $codon =~ /CAA/i ) { return 'Q' } # Glutamine
elsif ( $codon =~ /CAG/i ) { return 'Q' } # Glutamine
elsif ( $codon =~ /CGA/i ) { return 'R' } # Arginine
elsif ( $codon =~ /CGC/i ) { return 'R' } # Arginine
elsif ( $codon =~ /CGG/i ) { return 'R' } # Arginine
elsif ( $codon =~ /CGT/i ) { return 'R' } # Arginine
elsif ( $codon =~ /ATA/i ) { return 'I' } # Isoleucine
elsif ( $codon =~ /ATC/i ) { return 'I' } # Isoleucine
elsif ( $codon =~ /ATT/i ) { return 'I' } # Isoleucine
elsif ( $codon =~ /ATG/i ) { return 'M' } # Methionine
elsif ( $codon =~ /ACA/i ) { return 'T' } # Threonine
elsif ( $codon =~ /ACC/i ) { return 'T' } # Threonine
elsif ( $codon =~ /ACG/i ) { return 'T' } # Threonine
elsif ( $codon =~ /ACT/i ) { return 'T' } # Threonine
elsif ( $codon =~ /AAC/i ) { return 'N' } # Asparagine
elsif ( $codon =~ /AAT/i ) { return 'N' } # Asparagine
elsif ( $codon =~ /AAA/i ) { return 'K' } # Lysine
elsif ( $codon =~ /AAG/i ) { return 'K' } # Lysine
elsif ( $codon =~ /AGC/i ) { return 'S' } # Serine
elsif ( $codon =~ /AGT/i ) { return 'S' } # Serine
elsif ( $codon =~ /AGA/i ) { return 'R' } # Arginine
elsif ( $codon =~ /AGG/i ) { return 'R' } # Arginine
elsif ( $codon =~ /GTA/i ) { return 'V' } # Valine
elsif ( $codon =~ /GTC/i ) { return 'V' } # Valine
elsif ( $codon =~ /GTG/i ) { return 'V' } # Valine
elsif ( $codon =~ /GTT/i ) { return 'V' } # Valine
elsif ( $codon =~ /GCA/i ) { return 'A' } # Alanine
elsif ( $codon =~ /GCC/i ) { return 'A' } # Alanine
elsif ( $codon =~ /GCG/i ) { return 'A' } # Alanine
elsif ( $codon =~ /GCT/i ) { return 'A' } # Alanine
elsif ( $codon =~ /GAC/i ) { return 'D' } # Aspartic Acid
elsif ( $codon =~ /GAT/i ) { return 'D' } # Aspartic Acid
elsif ( $codon =~ /GAA/i ) { return 'E' } # Glutamic Acid
elsif ( $codon =~ /GAG/i ) { return 'E' } # Glutamic Acid
elsif ( $codon =~ /GGA/i ) { return 'G' } # Glycine
elsif ( $codon =~ /GGC/i ) { return 'G' } # Glycine
elsif ( $codon =~ /GGG/i ) { return 'G' } # Glycine
elsif ( $codon =~ /GGT/i ) { return 'G' } # Glycine
else {
print STDERR "Bad codon \"$codon\"!!\n";
exit;
}
}
# codon2aa
#
# A subroutine to translate a DNA 3-character codon to an amino acid
# Version 2
# This subroutine is commented out because a preferred version of it
# follows.
sub codon2aa {
my($codon) = @_;
if ( $codon =~ /GC./i) { return 'A' } # Alanine
elsif ( $codon =~ /TG[TC]/i) { return 'C' } # Cysteine
elsif ( $codon =~ /GA[TC]/i) { return 'D' } # Aspartic Acid
elsif ( $codon =~ /GA[AG]/i) { return 'E' } # Glutamic Acid
elsif ( $codon =~ /TT[TC]/i) { return 'F' } # Phenylalanine
elsif ( $codon =~ /GG./i) { return 'G' } # Glycine
elsif ( $codon =~ /CA[TC]/i) { return 'H' } # Histidine
elsif ( $codon =~ /AT[TCA]/i) { return 'I' } # Isoleucine
elsif ( $codon =~ /AA[AG]/i) { return 'K' } # Lysine
elsif ( $codon =~ /TT[AG]|CT./i) { return 'L' } # Leucine
elsif ( $codon =~ /ATG/i) { return 'M' } # Methionine
elsif ( $codon =~ /AA[TC]/i) { return 'N' } # Asparagine
elsif ( $codon =~ /CC./i) { return 'P' } # Proline
elsif ( $codon =~ /CA[AG]/i) { return 'Q' } # Glutamine
elsif ( $codon =~ /CG.|AG[AG]/i) { return 'R' } # Arginine
elsif ( $codon =~ /TC.|AG[TC]/i) { return 'S' } # Serine
elsif ( $codon =~ /AC./i) { return 'T' } # Threonine
elsif ( $codon =~ /GT./i) { return 'V' } # Valine
elsif ( $codon =~ /TGG/i) { return 'W' } # Tryptophan
elsif ( $codon =~ /TA[TC]/i) { return 'Y' } # Tyrosine
elsif ( $codon =~ /TA[AG]|TGA/i) { return '_' } # Stop
else {
print STDERR "Bad codon \"$codon\"!!\n";
exit;
}
}
# From Chapter 8
#
# codon2aa
#
# A subroutine to translate a DNA 3-character codon to an amino acid
# Version 3, using hash lookup
sub codon2aa {
my($codon) = @_;
$codon = uc $codon;
my(%genetic_code) = (
'TCA' => 'S', # Serine
'TCC' => 'S', # Serine
'TCG' => 'S', # Serine
'TCT' => 'S', # Serine
'TTC' => 'F', # Phenylalanine
'TTT' => 'F', # Phenylalanine
'TTA' => 'L', # Leucine
'TTG' => 'L', # Leucine
'TAC' => 'Y', # Tyrosine
'TAT' => 'Y', # Tyrosine
'TAA' => '_', # Stop
'TAG' => '_', # Stop
'TGC' => 'C', # Cysteine
'TGT' => 'C', # Cysteine
'TGA' => '_', # Stop
'TGG' => 'W', # Tryptophan
'CTA' => 'L', # Leucine
'CTC' => 'L', # Leucine
'CTG' => 'L', # Leucine
'CTT' => 'L', # Leucine
'CCA' => 'P', # Proline
'CCC' => 'P', # Proline
'CCG' => 'P', # Proline
'CCT' => 'P', # Proline
'CAC' => 'H', # Histidine
'CAT' => 'H', # Histidine
'CAA' => 'Q', # Glutamine
'CAG' => 'Q', # Glutamine
'CGA' => 'R', # Arginine
'CGC' => 'R', # Arginine
'CGG' => 'R', # Arginine
'CGT' => 'R', # Arginine
'ATA' => 'I', # Isoleucine
'ATC' => 'I', # Isoleucine
'ATT' => 'I', # Isoleucine
'ATG' => 'M', # Methionine
'ACA' => 'T', # Threonine
'ACC' => 'T', # Threonine
'ACG' => 'T', # Threonine
'ACT' => 'T', # Threonine
'AAC' => 'N', # Asparagine
'AAT' => 'N', # Asparagine
'AAA' => 'K', # Lysine
'AAG' => 'K', # Lysine
'AGC' => 'S', # Serine
'AGT' => 'S', # Serine
'AGA' => 'R', # Arginine
'AGG' => 'R', # Arginine
'GTA' => 'V', # Valine
'GTC' => 'V', # Valine
'GTG' => 'V', # Valine
'GTT' => 'V', # Valine
'GCA' => 'A', # Alanine
'GCC' => 'A', # Alanine
'GCG' => 'A', # Alanine
'GCT' => 'A', # Alanine
'GAC' => 'D', # Aspartic Acid
'GAT' => 'D', # Aspartic Acid
'GAA' => 'E', # Glutamic Acid
'GAG' => 'E', # Glutamic Acid
'GGA' => 'G', # Glycine
'GGC' => 'G', # Glycine
'GGG' => 'G', # Glycine
'GGT' => 'G', # Glycine
);
if(exists $genetic_code{$codon}) {
return $genetic_code{$codon};
}else{
print STDERR "Bad codon \"$codon\"!!\n";
exit;
}
}
# dna2peptide
#
# A subroutine to translate DNA sequence into a peptide
sub dna2peptide {
my($dna) = @_;
use strict;
use warnings;
#use BeginPerlBioinfo; # see Chapter 6 about this module
# Initialize variables
my $protein = '';
# Translate each three-base codon to an amino acid, and append to a protein
for(my $i=0; $i < (length($dna) - 2) ; $i += 3) {
$protein .= codon2aa( substr($dna,$i,3) );
}
return $protein;
}
# extractSEQRES
#
#-given an scalar containing SEQRES lines,
# return an array containing the chains of the sequence
sub extractSEQRES {
use strict;
use warnings;
my($seqres) = @_;
my $lastchain = '';
my $sequence = '';
my @results = ( );
# make array of lines
my @record = split ( /\n/, $seqres);
foreach my $line (@record) {
# Chain is in column 12, residues start in column 20
my ($thischain) = substr($line, 11, 1);
my($residues) = substr($line, 19, 52); # add space at end
# Check if a new chain, or continuation of previous chain
if("$lastchain" eq "") {
$sequence = $residues;
}elsif("$thischain" eq "$lastchain") {
$sequence .= $residues;
# Finish gathering previous chain (unless first record)
}elsif ( $sequence ) {
push(@results, $sequence);
$sequence = $residues;
}
$lastchain = $thischain;
}
# save last chain
push(@results, $sequence);
return @results;
}
# extract_sequence_from_fasta_data
#
# A subroutine to extract FASTA sequence data from an array
sub extract_sequence_from_fasta_data {
my(@fasta_file_data) = @_;
use strict;
use warnings;
# Declare and initialize variables
my $sequence = '';
foreach my $line (@fasta_file_data) {
# discard blank line
if ($line =~ /^\s*$/) {
next;
# discard comment line
} elsif($line =~ /^\s*#/) {
next;
# discard fasta header line
} elsif($line =~ /^>/) {
next;
# keep line, add to sequence string
} else {
$sequence .= $line;
}
}
# remove non-sequence data (in this case, whitespace) from $sequence string
$sequence =~ s/\s//g;
return $sequence;
}
# A Subroutine to Read FASTA Files
# get_file_data
#
# A subroutine to get data from a file given its filename
sub get_file_data {
my($filename) = @_;
use strict;
use warnings;
# Initialize variables
my @filedata = ( );
unless( open(GET_FILE_DATA, $filename) ) {
print STDERR "Cannot open file \"$filename\"\n\n";
exit;
}
@filedata = <GET_FILE_DATA>;
close GET_FILE_DATA;
return @filedata;
}
# print_sequence
#
# A subroutine to format and print sequence data
sub print_sequence {
my($sequence, $length) = @_;
use strict;
use warnings;
# Print sequence in lines of $length
for ( my $pos = 0 ; $pos < length($sequence) ; $pos += $length ) {
print substr($sequence, $pos, $length), "\n";
}
}
# translate_frame
#
# A subroutine to translate a frame of DNA
sub translate_frame {
my($seq, $start, $end) = @_;
my $protein;
# To make the subroutine easier to use, you won't need to specify
# the end point-it will just go to the end of the sequence
# by default.
unless($end) {
$end = length($seq);
}
# Finally, calculate and return the translation
return dna2peptide ( substr ( $seq, $start - 1, $end -$start + 1) );
}
1;
##############################################################################
#######################eind SUB#################################################
# Translate the DNA to protein in six reading frames
# and print the protein in lines 70 characters long
print "\n -------Reading Frame 1--------\n\n";
$protein = translate_frame($dna, 1);
print_sequence($protein, 70);
print "\n -------Reading Frame 2--------\n\n";
$protein = translate_frame($dna, 2);
print_sequence($protein, 70);
print "\n -------Reading Frame 3--------\n\n";
$protein = translate_frame($dna, 3);
print_sequence($protein, 70);
# Calculate reverse complement
$revcom = revcom($dna);
print "\n -------Reading Frame 4--------\n\n";
$protein = translate_frame($revcom, 1);
print_sequence($protein, 70);
print "\n -------Reading Frame 5--------\n\n";
$protein = translate_frame($revcom, 2);
print_sequence($protein, 70);
print "\n -------Reading Frame 6--------\n\n";
$protein = translate_frame($revcom, 3);
print_sequence($protein, 70);
exit;
I have a problem in my script can anybody here solve my problem. or tell me something about it.
I appreciate any help
regards
#!/usr/bin/perl
# Translate a DNA sequence in all six reading frames
use strict;
use warnings;
# Initialize variables
my @file_data = ( );
my $dna = '';
my $revcom = '';
my $protein = '';
my $codon = '';
print "name of the DNA sequence: ";
my @file_data = <STDIN>;
chomp (@file_data);
# Read in the contents of the file " input"
# Extract the sequence data from the contents of the file
$DNA = extract_sequence_from_fasta_data(@file_data);
####################################################################################
##################################subroutine###########################################
# codon2aa
#
# A subroutine to translate a DNA 3-character codon to an amino acid
# Version 1
# This subroutine is commented out because a preferred version of it
# follows.
sub codon2aa {
my($codon) = @_;
if ( $codon =~ /TCA/i ) { return 'S' } # Serine
elsif ( $codon =~ /TCC/i ) { return 'S' } # Serine
elsif ( $codon =~ /TCG/i ) { return 'S' } # Serine
elsif ( $codon =~ /TCT/i ) { return 'S' } # Serine
elsif ( $codon =~ /TTC/i ) { return 'F' } # Phenylalanine
elsif ( $codon =~ /TTT/i ) { return 'F' } # Phenylalanine
elsif ( $codon =~ /TTA/i ) { return 'L' } # Leucine
elsif ( $codon =~ /TTG/i ) { return 'L' } # Leucine
elsif ( $codon =~ /TAC/i ) { return 'Y' } # Tyrosine
elsif ( $codon =~ /TAT/i ) { return 'Y' } # Tyrosine
elsif ( $codon =~ /TAA/i ) { return '_' } # Stop
elsif ( $codon =~ /TAG/i ) { return '_' } # Stop
elsif ( $codon =~ /TGC/i ) { return 'C' } # Cysteine
elsif ( $codon =~ /TGT/i ) { return 'C' } # Cysteine
elsif ( $codon =~ /TGA/i ) { return '_' } # Stop
elsif ( $codon =~ /TGG/i ) { return 'W' } # Tryptophan
elsif ( $codon =~ /CTA/i ) { return 'L' } # Leucine
elsif ( $codon =~ /CTC/i ) { return 'L' } # Leucine
elsif ( $codon =~ /CTG/i ) { return 'L' } # Leucine
elsif ( $codon =~ /CTT/i ) { return 'L' } # Leucine
elsif ( $codon =~ /CCA/i ) { return 'P' } # Proline
elsif ( $codon =~ /CCC/i ) { return 'P' } # Proline
elsif ( $codon =~ /CCG/i ) { return 'P' } # Proline
elsif ( $codon =~ /CCT/i ) { return 'P' } # Proline
elsif ( $codon =~ /CAC/i ) { return 'H' } # Histidine
elsif ( $codon =~ /CAT/i ) { return 'H' } # Histidine
elsif ( $codon =~ /CAA/i ) { return 'Q' } # Glutamine
elsif ( $codon =~ /CAG/i ) { return 'Q' } # Glutamine
elsif ( $codon =~ /CGA/i ) { return 'R' } # Arginine
elsif ( $codon =~ /CGC/i ) { return 'R' } # Arginine
elsif ( $codon =~ /CGG/i ) { return 'R' } # Arginine
elsif ( $codon =~ /CGT/i ) { return 'R' } # Arginine
elsif ( $codon =~ /ATA/i ) { return 'I' } # Isoleucine
elsif ( $codon =~ /ATC/i ) { return 'I' } # Isoleucine
elsif ( $codon =~ /ATT/i ) { return 'I' } # Isoleucine
elsif ( $codon =~ /ATG/i ) { return 'M' } # Methionine
elsif ( $codon =~ /ACA/i ) { return 'T' } # Threonine
elsif ( $codon =~ /ACC/i ) { return 'T' } # Threonine
elsif ( $codon =~ /ACG/i ) { return 'T' } # Threonine
elsif ( $codon =~ /ACT/i ) { return 'T' } # Threonine
elsif ( $codon =~ /AAC/i ) { return 'N' } # Asparagine
elsif ( $codon =~ /AAT/i ) { return 'N' } # Asparagine
elsif ( $codon =~ /AAA/i ) { return 'K' } # Lysine
elsif ( $codon =~ /AAG/i ) { return 'K' } # Lysine
elsif ( $codon =~ /AGC/i ) { return 'S' } # Serine
elsif ( $codon =~ /AGT/i ) { return 'S' } # Serine
elsif ( $codon =~ /AGA/i ) { return 'R' } # Arginine
elsif ( $codon =~ /AGG/i ) { return 'R' } # Arginine
elsif ( $codon =~ /GTA/i ) { return 'V' } # Valine
elsif ( $codon =~ /GTC/i ) { return 'V' } # Valine
elsif ( $codon =~ /GTG/i ) { return 'V' } # Valine
elsif ( $codon =~ /GTT/i ) { return 'V' } # Valine
elsif ( $codon =~ /GCA/i ) { return 'A' } # Alanine
elsif ( $codon =~ /GCC/i ) { return 'A' } # Alanine
elsif ( $codon =~ /GCG/i ) { return 'A' } # Alanine
elsif ( $codon =~ /GCT/i ) { return 'A' } # Alanine
elsif ( $codon =~ /GAC/i ) { return 'D' } # Aspartic Acid
elsif ( $codon =~ /GAT/i ) { return 'D' } # Aspartic Acid
elsif ( $codon =~ /GAA/i ) { return 'E' } # Glutamic Acid
elsif ( $codon =~ /GAG/i ) { return 'E' } # Glutamic Acid
elsif ( $codon =~ /GGA/i ) { return 'G' } # Glycine
elsif ( $codon =~ /GGC/i ) { return 'G' } # Glycine
elsif ( $codon =~ /GGG/i ) { return 'G' } # Glycine
elsif ( $codon =~ /GGT/i ) { return 'G' } # Glycine
else {
print STDERR "Bad codon \"$codon\"!!\n";
exit;
}
}
# codon2aa
#
# A subroutine to translate a DNA 3-character codon to an amino acid
# Version 2
# This subroutine is commented out because a preferred version of it
# follows.
sub codon2aa {
my($codon) = @_;
if ( $codon =~ /GC./i) { return 'A' } # Alanine
elsif ( $codon =~ /TG[TC]/i) { return 'C' } # Cysteine
elsif ( $codon =~ /GA[TC]/i) { return 'D' } # Aspartic Acid
elsif ( $codon =~ /GA[AG]/i) { return 'E' } # Glutamic Acid
elsif ( $codon =~ /TT[TC]/i) { return 'F' } # Phenylalanine
elsif ( $codon =~ /GG./i) { return 'G' } # Glycine
elsif ( $codon =~ /CA[TC]/i) { return 'H' } # Histidine
elsif ( $codon =~ /AT[TCA]/i) { return 'I' } # Isoleucine
elsif ( $codon =~ /AA[AG]/i) { return 'K' } # Lysine
elsif ( $codon =~ /TT[AG]|CT./i) { return 'L' } # Leucine
elsif ( $codon =~ /ATG/i) { return 'M' } # Methionine
elsif ( $codon =~ /AA[TC]/i) { return 'N' } # Asparagine
elsif ( $codon =~ /CC./i) { return 'P' } # Proline
elsif ( $codon =~ /CA[AG]/i) { return 'Q' } # Glutamine
elsif ( $codon =~ /CG.|AG[AG]/i) { return 'R' } # Arginine
elsif ( $codon =~ /TC.|AG[TC]/i) { return 'S' } # Serine
elsif ( $codon =~ /AC./i) { return 'T' } # Threonine
elsif ( $codon =~ /GT./i) { return 'V' } # Valine
elsif ( $codon =~ /TGG/i) { return 'W' } # Tryptophan
elsif ( $codon =~ /TA[TC]/i) { return 'Y' } # Tyrosine
elsif ( $codon =~ /TA[AG]|TGA/i) { return '_' } # Stop
else {
print STDERR "Bad codon \"$codon\"!!\n";
exit;
}
}
# From Chapter 8
#
# codon2aa
#
# A subroutine to translate a DNA 3-character codon to an amino acid
# Version 3, using hash lookup
sub codon2aa {
my($codon) = @_;
$codon = uc $codon;
my(%genetic_code) = (
'TCA' => 'S', # Serine
'TCC' => 'S', # Serine
'TCG' => 'S', # Serine
'TCT' => 'S', # Serine
'TTC' => 'F', # Phenylalanine
'TTT' => 'F', # Phenylalanine
'TTA' => 'L', # Leucine
'TTG' => 'L', # Leucine
'TAC' => 'Y', # Tyrosine
'TAT' => 'Y', # Tyrosine
'TAA' => '_', # Stop
'TAG' => '_', # Stop
'TGC' => 'C', # Cysteine
'TGT' => 'C', # Cysteine
'TGA' => '_', # Stop
'TGG' => 'W', # Tryptophan
'CTA' => 'L', # Leucine
'CTC' => 'L', # Leucine
'CTG' => 'L', # Leucine
'CTT' => 'L', # Leucine
'CCA' => 'P', # Proline
'CCC' => 'P', # Proline
'CCG' => 'P', # Proline
'CCT' => 'P', # Proline
'CAC' => 'H', # Histidine
'CAT' => 'H', # Histidine
'CAA' => 'Q', # Glutamine
'CAG' => 'Q', # Glutamine
'CGA' => 'R', # Arginine
'CGC' => 'R', # Arginine
'CGG' => 'R', # Arginine
'CGT' => 'R', # Arginine
'ATA' => 'I', # Isoleucine
'ATC' => 'I', # Isoleucine
'ATT' => 'I', # Isoleucine
'ATG' => 'M', # Methionine
'ACA' => 'T', # Threonine
'ACC' => 'T', # Threonine
'ACG' => 'T', # Threonine
'ACT' => 'T', # Threonine
'AAC' => 'N', # Asparagine
'AAT' => 'N', # Asparagine
'AAA' => 'K', # Lysine
'AAG' => 'K', # Lysine
'AGC' => 'S', # Serine
'AGT' => 'S', # Serine
'AGA' => 'R', # Arginine
'AGG' => 'R', # Arginine
'GTA' => 'V', # Valine
'GTC' => 'V', # Valine
'GTG' => 'V', # Valine
'GTT' => 'V', # Valine
'GCA' => 'A', # Alanine
'GCC' => 'A', # Alanine
'GCG' => 'A', # Alanine
'GCT' => 'A', # Alanine
'GAC' => 'D', # Aspartic Acid
'GAT' => 'D', # Aspartic Acid
'GAA' => 'E', # Glutamic Acid
'GAG' => 'E', # Glutamic Acid
'GGA' => 'G', # Glycine
'GGC' => 'G', # Glycine
'GGG' => 'G', # Glycine
'GGT' => 'G', # Glycine
);
if(exists $genetic_code{$codon}) {
return $genetic_code{$codon};
}else{
print STDERR "Bad codon \"$codon\"!!\n";
exit;
}
}
# dna2peptide
#
# A subroutine to translate DNA sequence into a peptide
sub dna2peptide {
my($dna) = @_;
use strict;
use warnings;
#use BeginPerlBioinfo; # see Chapter 6 about this module
# Initialize variables
my $protein = '';
# Translate each three-base codon to an amino acid, and append to a protein
for(my $i=0; $i < (length($dna) - 2) ; $i += 3) {
$protein .= codon2aa( substr($dna,$i,3) );
}
return $protein;
}
# extractSEQRES
#
#-given an scalar containing SEQRES lines,
# return an array containing the chains of the sequence
sub extractSEQRES {
use strict;
use warnings;
my($seqres) = @_;
my $lastchain = '';
my $sequence = '';
my @results = ( );
# make array of lines
my @record = split ( /\n/, $seqres);
foreach my $line (@record) {
# Chain is in column 12, residues start in column 20
my ($thischain) = substr($line, 11, 1);
my($residues) = substr($line, 19, 52); # add space at end
# Check if a new chain, or continuation of previous chain
if("$lastchain" eq "") {
$sequence = $residues;
}elsif("$thischain" eq "$lastchain") {
$sequence .= $residues;
# Finish gathering previous chain (unless first record)
}elsif ( $sequence ) {
push(@results, $sequence);
$sequence = $residues;
}
$lastchain = $thischain;
}
# save last chain
push(@results, $sequence);
return @results;
}
# extract_sequence_from_fasta_data
#
# A subroutine to extract FASTA sequence data from an array
sub extract_sequence_from_fasta_data {
my(@fasta_file_data) = @_;
use strict;
use warnings;
# Declare and initialize variables
my $sequence = '';
foreach my $line (@fasta_file_data) {
# discard blank line
if ($line =~ /^\s*$/) {
next;
# discard comment line
} elsif($line =~ /^\s*#/) {
next;
# discard fasta header line
} elsif($line =~ /^>/) {
next;
# keep line, add to sequence string
} else {
$sequence .= $line;
}
}
# remove non-sequence data (in this case, whitespace) from $sequence string
$sequence =~ s/\s//g;
return $sequence;
}
# A Subroutine to Read FASTA Files
# get_file_data
#
# A subroutine to get data from a file given its filename
sub get_file_data {
my($filename) = @_;
use strict;
use warnings;
# Initialize variables
my @filedata = ( );
unless( open(GET_FILE_DATA, $filename) ) {
print STDERR "Cannot open file \"$filename\"\n\n";
exit;
}
@filedata = <GET_FILE_DATA>;
close GET_FILE_DATA;
return @filedata;
}
# print_sequence
#
# A subroutine to format and print sequence data
sub print_sequence {
my($sequence, $length) = @_;
use strict;
use warnings;
# Print sequence in lines of $length
for ( my $pos = 0 ; $pos < length($sequence) ; $pos += $length ) {
print substr($sequence, $pos, $length), "\n";
}
}
# translate_frame
#
# A subroutine to translate a frame of DNA
sub translate_frame {
my($seq, $start, $end) = @_;
my $protein;
# To make the subroutine easier to use, you won't need to specify
# the end point-it will just go to the end of the sequence
# by default.
unless($end) {
$end = length($seq);
}
# Finally, calculate and return the translation
return dna2peptide ( substr ( $seq, $start - 1, $end -$start + 1) );
}
1;
##############################################################################
#######################eind SUB#################################################
# Translate the DNA to protein in six reading frames
# and print the protein in lines 70 characters long
print "\n -------Reading Frame 1--------\n\n";
$protein = translate_frame($dna, 1);
print_sequence($protein, 70);
print "\n -------Reading Frame 2--------\n\n";
$protein = translate_frame($dna, 2);
print_sequence($protein, 70);
print "\n -------Reading Frame 3--------\n\n";
$protein = translate_frame($dna, 3);
print_sequence($protein, 70);
# Calculate reverse complement
$revcom = revcom($dna);
print "\n -------Reading Frame 4--------\n\n";
$protein = translate_frame($revcom, 1);
print_sequence($protein, 70);
print "\n -------Reading Frame 5--------\n\n";
$protein = translate_frame($revcom, 2);
print_sequence($protein, 70);
print "\n -------Reading Frame 6--------\n\n";
$protein = translate_frame($revcom, 3);
print_sequence($protein, 70);
exit;