Sorry bout posting this in another post, but replying didn't work. I fixed the first problem, but now I don't know how to access the hashes within hashes. How could I get a list for each amino acid that goes like...
and this is for research, but not the programing part
Thanks for any help that any of you can offer
Name
Number of codons (I've got these parts so far)
Codon A: number of occurrences
Codon B: number of occurrences
Codon C: number of occurrences (if these exist)
I don't know how to extract that information.
Thanks for any help
use Data:umper;
##########################################
# Load Sequence
##########################################
my $DNA;
$DNA = "ATGACCCCAATACGCAAAACTAACCCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATGCACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCCTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCTTGCCCTTCATTATTGCAGCCCTAGCAACACTCCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATCACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACATTAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCTAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCTAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATAATAAT";
##########################################
# Populate Data Set
##########################################
my %MasterHash = ();
my $AA_Count = 0;
my $CD_Occurences = 0;
for(my $i=0; $i < (length($DNA) - 2) ; $i += 3) {
my $Codon = substr($DNA,$i,3);
my $AA = codon2aa( $Codon );
print "\nAmino Acid: $AA";
print "\nCodon: $Codon\n";
### Setting up AA count
if ($MasterHash{$AA}) { #tests whether AA key has already been created
$MasterHash{$AA}{Count}++; #if so, add to AA count
} else { #if not...
$MasterHash{$AA}{Symbol} = $AA; #set up symbol
$MasterHash{$AA}{Name} = nameAA($AA); #set up name
$MasterHash{$AA}{Count} = 1; #set up AA count
}
print "Amino Acid Count: $MasterHash{$AA}{Count}\n";
### Settin up Codon Count
if ($MasterHash{$AA}{Codons}{$Codon}) { #tests whether codon key has already been created
$MasterHash{$AA}{Codons}{$Codon}{Count}++; #if so, add to codon count
} else { #if not...
$MasterHash{$AA}{Codons}{$Codon}{Count} = 1; #set up codon count
}
print "Codon Count: $MasterHash{$AA}{Codons}{$Codon}{Count}\n";
# %MasterHash = (
#
# $AA => {
# Symbol => $AA,
# Name => nameAA($AA),
# Count => $AA_Count,
# Codons => {
# $Codon => {
# Count => $CD_Count,
# },
# },
# },
# );
}
##########################################
# Print Data Set
##########################################
print "\n\nRESULTS...\n\n";
while( my ($var_AA, $var_unknown) = each (%MasterHash)) {
my $AA_Name = nameAA($var_AA);
print "$AA_Name\n";
print "unknown: $var_unknown\n";
print "AA Count: ";
print $MasterHash{$var_AA}->{Count};
print "\nCodons: ";
print $MasterHash{$var_AA}->{Codons};
print "\n\n";
}
#print Dumper(%MasterHash);
##########################################
# Subroutines
##########################################
sub codon2aa {
my($codon) = @_;
$codon = uc $codon;
my(%genetic_code) = (
'TCA' => 'S', # Serine
'TCC' => 'S', # Serine
'TCG' => 'S', # Serine
'TCT' => 'S', # Serine
'TTC' => 'F', # Phenylalanine
'TTT' => 'F', # Phenylalanine
'TTA' => 'L', # Leucine
'TTG' => 'L', # Leucine
'TAC' => 'Y', # Tyrosine
'TAT' => 'Y', # Tyrosine
'TAA' => '_', # Stop
'TAG' => '_', # Stop
'TGC' => 'C', # Cysteine
'TGT' => 'C', # Cysteine
'TGA' => '_', # Stop
'TGG' => 'W', # Tryptophan
'CTA' => 'L', # Leucine
'CTC' => 'L', # Leucine
'CTG' => 'L', # Leucine
'CTT' => 'L', # Leucine
'CCA' => 'P', # Proline
'CCC' => 'P', # Proline
'CCG' => 'P', # Proline
'CCT' => 'P', # Proline
'CAC' => 'H', # Histidine
'CAT' => 'H', # Histidine
'CAA' => 'Q', # Glutamine
'CAG' => 'Q', # Glutamine
'CGA' => 'R', # Arginine
'CGC' => 'R', # Arginine
'CGG' => 'R', # Arginine
'CGT' => 'R', # Arginine
'ATA' => 'I', # Isoleucine
'ATC' => 'I', # Isoleucine
'ATT' => 'I', # Isoleucine
'ATG' => 'M', # Methionine
'ACA' => 'T', # Threonine
'ACC' => 'T', # Threonine
'ACG' => 'T', # Threonine
'ACT' => 'T', # Threonine
'AAC' => 'N', # Asparagine
'AAT' => 'N', # Asparagine
'AAA' => 'K', # Lysine
'AAG' => 'K', # Lysine
'AGC' => 'S', # Serine
'AGT' => 'S', # Serine
'AGA' => 'R', # Arginine
'AGG' => 'R', # Arginine
'GTA' => 'V', # Valine
'GTC' => 'V', # Valine
'GTG' => 'V', # Valine
'GTT' => 'V', # Valine
'GCA' => 'A', # Alanine
'GCC' => 'A', # Alanine
'GCG' => 'A', # Alanine
'GCT' => 'A', # Alanine
'GAC' => 'D', # Aspartic Acid
'GAT' => 'D', # Aspartic Acid
'GAA' => 'E', # Glutamic Acid
'GAG' => 'E', # Glutamic Acid
'GGA' => 'G', # Glycine
'GGC' => 'G', # Glycine
'GGG' => 'G', # Glycine
'GGT' => 'G', # Glycine
);
if(exists $genetic_code{$codon}) {
return $genetic_code{$codon};
}else{
print STDERR "Bad codon \"$codon\"!!\n";
#exit;
}
}
sub nameAA {
my($AA_single) = @_;
$AA_single = uc $AA_single;
my(%AA_Name_code) = (
'S' => 'Serine',
'F' => 'Phenylalanine',
'L' => 'Leucine',
'Y' => 'Tyrosine',
'_' => 'Stop',
'C' => 'Cysteine',
'W' => 'Tryptophan',
'P' => 'Proline',
'H' => 'Histidine',
'Q' => 'Glutamine',
'R' => 'Arginine',
'I' => 'Isoleucine',
'M' => 'Methionine',
'T' => 'Threonine',
'N' => 'Asparagine',
'K' => 'Lysine',
'V' => 'Valine',
'A' => 'Alanine',
'D' => 'Aspartic Acid',
'E' => 'Glutamic Acid',
'G' => 'Glycine',
);
if(exists $AA_Name_code{$AA_single}) {
return $AA_Name_code{$AA_single};
}else{
print STDERR "Bad codon \"$codon\"!!\n";
#exit;
}
}
and this is for research, but not the programing part
Thanks for any help that any of you can offer
Name
Number of codons (I've got these parts so far)
Codon A: number of occurrences
Codon B: number of occurrences
Codon C: number of occurrences (if these exist)
I don't know how to extract that information.
Thanks for any help
use Data:umper;
##########################################
# Load Sequence
##########################################
my $DNA;
$DNA = "ATGACCCCAATACGCAAAACTAACCCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATGCACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCCTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCTTGCCCTTCATTATTGCAGCCCTAGCAACACTCCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATCACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACATTAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCTAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCTAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATAATAAT";
##########################################
# Populate Data Set
##########################################
my %MasterHash = ();
my $AA_Count = 0;
my $CD_Occurences = 0;
for(my $i=0; $i < (length($DNA) - 2) ; $i += 3) {
my $Codon = substr($DNA,$i,3);
my $AA = codon2aa( $Codon );
print "\nAmino Acid: $AA";
print "\nCodon: $Codon\n";
### Setting up AA count
if ($MasterHash{$AA}) { #tests whether AA key has already been created
$MasterHash{$AA}{Count}++; #if so, add to AA count
} else { #if not...
$MasterHash{$AA}{Symbol} = $AA; #set up symbol
$MasterHash{$AA}{Name} = nameAA($AA); #set up name
$MasterHash{$AA}{Count} = 1; #set up AA count
}
print "Amino Acid Count: $MasterHash{$AA}{Count}\n";
### Settin up Codon Count
if ($MasterHash{$AA}{Codons}{$Codon}) { #tests whether codon key has already been created
$MasterHash{$AA}{Codons}{$Codon}{Count}++; #if so, add to codon count
} else { #if not...
$MasterHash{$AA}{Codons}{$Codon}{Count} = 1; #set up codon count
}
print "Codon Count: $MasterHash{$AA}{Codons}{$Codon}{Count}\n";
# %MasterHash = (
#
# $AA => {
# Symbol => $AA,
# Name => nameAA($AA),
# Count => $AA_Count,
# Codons => {
# $Codon => {
# Count => $CD_Count,
# },
# },
# },
# );
}
##########################################
# Print Data Set
##########################################
print "\n\nRESULTS...\n\n";
while( my ($var_AA, $var_unknown) = each (%MasterHash)) {
my $AA_Name = nameAA($var_AA);
print "$AA_Name\n";
print "unknown: $var_unknown\n";
print "AA Count: ";
print $MasterHash{$var_AA}->{Count};
print "\nCodons: ";
print $MasterHash{$var_AA}->{Codons};
print "\n\n";
}
#print Dumper(%MasterHash);
##########################################
# Subroutines
##########################################
sub codon2aa {
my($codon) = @_;
$codon = uc $codon;
my(%genetic_code) = (
'TCA' => 'S', # Serine
'TCC' => 'S', # Serine
'TCG' => 'S', # Serine
'TCT' => 'S', # Serine
'TTC' => 'F', # Phenylalanine
'TTT' => 'F', # Phenylalanine
'TTA' => 'L', # Leucine
'TTG' => 'L', # Leucine
'TAC' => 'Y', # Tyrosine
'TAT' => 'Y', # Tyrosine
'TAA' => '_', # Stop
'TAG' => '_', # Stop
'TGC' => 'C', # Cysteine
'TGT' => 'C', # Cysteine
'TGA' => '_', # Stop
'TGG' => 'W', # Tryptophan
'CTA' => 'L', # Leucine
'CTC' => 'L', # Leucine
'CTG' => 'L', # Leucine
'CTT' => 'L', # Leucine
'CCA' => 'P', # Proline
'CCC' => 'P', # Proline
'CCG' => 'P', # Proline
'CCT' => 'P', # Proline
'CAC' => 'H', # Histidine
'CAT' => 'H', # Histidine
'CAA' => 'Q', # Glutamine
'CAG' => 'Q', # Glutamine
'CGA' => 'R', # Arginine
'CGC' => 'R', # Arginine
'CGG' => 'R', # Arginine
'CGT' => 'R', # Arginine
'ATA' => 'I', # Isoleucine
'ATC' => 'I', # Isoleucine
'ATT' => 'I', # Isoleucine
'ATG' => 'M', # Methionine
'ACA' => 'T', # Threonine
'ACC' => 'T', # Threonine
'ACG' => 'T', # Threonine
'ACT' => 'T', # Threonine
'AAC' => 'N', # Asparagine
'AAT' => 'N', # Asparagine
'AAA' => 'K', # Lysine
'AAG' => 'K', # Lysine
'AGC' => 'S', # Serine
'AGT' => 'S', # Serine
'AGA' => 'R', # Arginine
'AGG' => 'R', # Arginine
'GTA' => 'V', # Valine
'GTC' => 'V', # Valine
'GTG' => 'V', # Valine
'GTT' => 'V', # Valine
'GCA' => 'A', # Alanine
'GCC' => 'A', # Alanine
'GCG' => 'A', # Alanine
'GCT' => 'A', # Alanine
'GAC' => 'D', # Aspartic Acid
'GAT' => 'D', # Aspartic Acid
'GAA' => 'E', # Glutamic Acid
'GAG' => 'E', # Glutamic Acid
'GGA' => 'G', # Glycine
'GGC' => 'G', # Glycine
'GGG' => 'G', # Glycine
'GGT' => 'G', # Glycine
);
if(exists $genetic_code{$codon}) {
return $genetic_code{$codon};
}else{
print STDERR "Bad codon \"$codon\"!!\n";
#exit;
}
}
sub nameAA {
my($AA_single) = @_;
$AA_single = uc $AA_single;
my(%AA_Name_code) = (
'S' => 'Serine',
'F' => 'Phenylalanine',
'L' => 'Leucine',
'Y' => 'Tyrosine',
'_' => 'Stop',
'C' => 'Cysteine',
'W' => 'Tryptophan',
'P' => 'Proline',
'H' => 'Histidine',
'Q' => 'Glutamine',
'R' => 'Arginine',
'I' => 'Isoleucine',
'M' => 'Methionine',
'T' => 'Threonine',
'N' => 'Asparagine',
'K' => 'Lysine',
'V' => 'Valine',
'A' => 'Alanine',
'D' => 'Aspartic Acid',
'E' => 'Glutamic Acid',
'G' => 'Glycine',
);
if(exists $AA_Name_code{$AA_single}) {
return $AA_Name_code{$AA_single};
}else{
print STDERR "Bad codon \"$codon\"!!\n";
#exit;
}
}