0210828176
Programmer
- Apr 23, 2012
- 1
Hi I am new to this forum. I had a query regarding converting DNA code to proteins... which is simple by explanation but I find it hard to get accurate results with code. For ex: In a code like ATGTACTAT (here every 3 non overlapping alphabets get replaced by a single alphabet. For ex: ATG -> M; TAC-> Y; TAT -> Y. I am very new to AWK, I tried making a code below but it doesnt work accurately. can u fix it? thanks in advance
awk 'BEGIN{
c["ATG"]="M"; c["TTT"]="F"; c["TTC"]="F"; c["TTA"]="L"; c["TTG"]="L"; c["CTT"]="L"; c["CTC"]="L"; c["CTA"]="L"; c["CTG"]="L"; c["ATT"]="I"; c["ATC"]="I";
c["ATA"]="I"; c["GTT"]="V"; c["GTC"]="V"; c["GTA"]="V"; c["GTG"]="V"; c["TCT"]="S"; c["TCC"]="S"; c["TCA"]="S"; c["TCG"]="S"; c["CCT"]="P"; c["CCC"]="P";
c["CCA"]="P"; c["CCG"]="P"; c["ACT"]="T"; c["ACC"]="T"; c["ACA"]="T"; c["ACG"]="T"; c["GCT"]="A"; c["GCC"]="A"; c["GCA"]="A"; c["GCG"]="A";c["TAT"]="Y";
c["TAC"]="Y"; c["CAT"]="H"; c["CAC"]="H"; c["CAA"]="Q"; c["CAG"]="Q"; c["AAT"]="N"; c["AAC"]="N"; c["AAA"]="K"; c["AAG"]="K"; c["GAT"]="D"; c["GAC"]="D";
c["GAA"]="E"; c["GAG"]="E"; c["TGT"]="C"; c["TGC"]="C"; c["TGG"]="W"; c["CGT"]=R; c["CGC"]=R; c["CGA"]=R; c["CGG"]=R; c["AGA"]=R; c["AGG"]=R; c["AGT"]="S";
c["AGC"]="S"; c["GGT"]="G"; c["GGC"]="G"; c["GGA"]="G"; c["GGG"]="G";}
{i=1; p=""}
{do {
s=substr($0,i,3)
printf ("%s",s)
{if (c==""){p=p" "} else {p=p c" "}}
i=i+3}
while (s!="")}
{printf("\n%s\n",p)} ' genes_contig0028.txt
awk 'BEGIN{
c["ATG"]="M"; c["TTT"]="F"; c["TTC"]="F"; c["TTA"]="L"; c["TTG"]="L"; c["CTT"]="L"; c["CTC"]="L"; c["CTA"]="L"; c["CTG"]="L"; c["ATT"]="I"; c["ATC"]="I";
c["ATA"]="I"; c["GTT"]="V"; c["GTC"]="V"; c["GTA"]="V"; c["GTG"]="V"; c["TCT"]="S"; c["TCC"]="S"; c["TCA"]="S"; c["TCG"]="S"; c["CCT"]="P"; c["CCC"]="P";
c["CCA"]="P"; c["CCG"]="P"; c["ACT"]="T"; c["ACC"]="T"; c["ACA"]="T"; c["ACG"]="T"; c["GCT"]="A"; c["GCC"]="A"; c["GCA"]="A"; c["GCG"]="A";c["TAT"]="Y";
c["TAC"]="Y"; c["CAT"]="H"; c["CAC"]="H"; c["CAA"]="Q"; c["CAG"]="Q"; c["AAT"]="N"; c["AAC"]="N"; c["AAA"]="K"; c["AAG"]="K"; c["GAT"]="D"; c["GAC"]="D";
c["GAA"]="E"; c["GAG"]="E"; c["TGT"]="C"; c["TGC"]="C"; c["TGG"]="W"; c["CGT"]=R; c["CGC"]=R; c["CGA"]=R; c["CGG"]=R; c["AGA"]=R; c["AGG"]=R; c["AGT"]="S";
c["AGC"]="S"; c["GGT"]="G"; c["GGC"]="G"; c["GGA"]="G"; c["GGG"]="G";}
{i=1; p=""}
{do {
s=substr($0,i,3)
printf ("%s",s)
{if (c
i=i+3}
while (s!="")}
{printf("\n%s\n",p)} ' genes_contig0028.txt