BunnyMerah
Programmer
Hi,
I am a beginner and I would like to learn and explore sed as it is very powerful one liner to crunch huge data. I have thousands of data that I am working on. Below is the sample:-
I need to find pattern " ;" and append the effected columns with quote (") and get the output as follows:-
I tried playing around with the following codes and it almost worked.
but couldn't get the output that I wanted as above, where there should be a blank space after the field id. For instance:-
The result should be g_id "lnc-ZFHX3-27"; NOT g_id"lnc-ZFHX3-27";
Just need a little bit more help here. thanks
I am a beginner and I would like to learn and explore sed as it is very powerful one liner to crunch huge data. I have thousands of data that I am working on. Below is the sample:-
Code:
chr1 OnePiece Loc 01516 03251 . - . g_id LINC01725 ; t_id LINC01725:44 ; g_alias_1 G233008 ; g_alias_2 RP11-475O6.1 ; g_alias_3 G233008.1 ; g_alias_4 9930.1 ; g_alias_5 G233008.5 ; g_alias_6 LINC01725 ; g_alias_7 LOC101927560 ; t_alias_1 E457273 ; t_alias_2 E457273.1 ; t_alias_3 RP11-475O6.1-005 ; t_alias_4 27496.1 ; t_alias_5 NONHSAT004171 ; t_alias_6 NR_119374 ; t_alias_7 E457273.5 ; t_alias_8 NR_119374.1 ;
chr1 OnePiece Loc 49907 50022 . - . g_id LINC01725 ; t_id LINC01725:44 ; g_alias_1 G233008 ; g_alias_2 RP11-475O6.1 ; g_alias_3 G233008.1 ; g_alias_4 9930.1 ; g_alias_5 G233008.5 ; g_alias_6 LINC01725 ; g_alias_7 LOC101927560 ; t_alias_1 E457273 ; t_alias_2 E457273.1 ; t_alias_3 RP11-475O6.1-005 ; t_alias_4 27496.1 ; t_alias_5 NONHSAT004171 ; t_alias_6 NR_119374 ; t_alias_7 E457273.5 ; t_alias_8 NR_119374.1 ;
chr1 OnePiece Loc 60408 60546 . - . g_id LINC01725 ; t_id LINC01725:44 ; g_alias_1 G233008 ; g_alias_2 RP11-475O6.1 ; g_alias_3 G233008.1 ; g_alias_4 9930.1 ; g_alias_5 G233008.5 ; g_alias_6 LINC01725 ; g_alias_7 LOC101927560 ; t_alias_1 E457273 ; t_alias_2 E457273.1 ; t_alias_3 RP11-475O6.1-005 ; t_alias_4 27496.1 ; t_alias_5 NONHSAT004171 ; t_alias_6 NR_119374 ; t_alias_7 E457273.5 ; t_alias_8 NR_119374.1 ;
chr16 OnePiece Loc 92392 92726 . - . g_id lnc-ZFHX3-27 ; t_id lnc-ZFHX3-27:11 ; g_alias_1 G249447 ; g_alias_2 XLOC_012007 ; g_alias_3 linc-ZFHX3-2 ; g_alias_4 G261404.1 ; g_alias_5 AC009120.4 ; g_alias_6 176255.2 ; g_alias_7 G261404.5 ; g_alias_8 G261404.6 ; g_alias_9 AC138627.1 ; g_alias_10 LOC101928035 ; t_alias_1 E510251 ; t_alias_2 TCONS_00024274 ; t_alias_3 E568137.1 ; t_alias_4 AC009120.4-001 ; t_alias_5 431686.1 ; t_alias_6 NONHSAT143655 ; t_alias_7 NR_104657 ; t_alias_8 NR_104657.1 ;
chr16 OnePiece Loc 05905 06165 . - . g_id lnc-ZFHX3-27 ; t_id lnc-ZFHX3-27:11 ; g_alias_1 G249447 ; g_alias_2 XLOC_012007 ; g_alias_3 linc-ZFHX3-2 ; g_alias_4 G261404.1 ; g_alias_5 AC009120.4 ; g_alias_6 176255.2 ; g_alias_7 G261404.5 ; g_alias_8 G261404.6 ; g_alias_9 AC138627.1 ; g_alias_10 LOC101928035 ; t_alias_1 E510251 ; t_alias_2 TCONS_00024274 ; t_alias_3 E568137.1 ; t_alias_4 AC009120.4-001 ; t_alias_5 431686.1 ; t_alias_6 NONHSAT143655 ; t_alias_7 NR_104657 ; t_alias_8 NR_104657.1 ;
chr16 OnePiece Loc 10306 10505 . - . g_id lnc-ZFHX3-27 ; t_id lnc-ZFHX3-27:11 ; g_alias_1 G249447 ; g_alias_2 XLOC_012007 ; g_alias_3 linc-ZFHX3-2 ; g_alias_4 G261404.1 ; g_alias_5 AC009120.4 ; g_alias_6 176255.2 ; g_alias_7 G261404.5 ; g_alias_8 G261404.6 ; g_alias_9 AC138627.1 ; g_alias_10 LOC101928035 ; t_alias_1 E510251 ; t_alias_2 TCONS_00024274 ; t_alias_3 E568137.1 ; t_alias_4 AC009120.4-001 ; t_alias_5 431686.1 ; t_alias_6 NONHSAT143655 ; t_alias_7 NR_104657 ; t_alias_8 NR_104657.1 ;
chr16 OnePiece Loc 15352 15521 . - . g_id lnc-ZFHX3-27 ; t_id lnc-ZFHX3-27:11 ; g_alias_1 G249447 ; g_alias_2 XLOC_012007 ; g_alias_3 linc-ZFHX3-2 ; g_alias_4 G261404.1 ; g_alias_5 AC009120.4 ; g_alias_6 176255.2 ; g_alias_7 G261404.5 ; g_alias_8 G261404.6 ; g_alias_9 AC138627.1 ; g_alias_10 LOC101928035 ; t_alias_1 E510251 ; t_alias_2 TCONS_00024274 ; t_alias_3 E568137.1 ; t_alias_4 AC009120.4-001 ; t_alias_5 431686.1 ; t_alias_6 NONHSAT143655 ; t_alias_7 NR_104657 ; t_alias_8 NR_104657.1 ;
I need to find pattern " ;" and append the effected columns with quote (") and get the output as follows:-
Code:
chr1 OnePiece Loc 01516 03251 . - . g_id “LINC01725”; t_id "LINC01725:44"; g_alias_1 "G233008"; g_alias_2 "RP11-475O6.1"; g_alias_3 "G233008.1"; g_alias_4 "9930.1"; g_alias_5 "G233008.5"; g_alias_6 "LINC01725"; g_alias_7 "LOC101927560"; t_alias_1 "E457273"; t_alias_2 "E457273.1"; t_alias_3 "RP11-475O6.1-005"; t_alias_4 "27496.1"; t_alias_5 "NONHSAT004171"; t_alias_6 "NR_119374"; t_alias_7 "E457273.5"; t_alias_8 "NR_119374.1";
chr1 OnePiece Loc 49907 50022 . - . g_id "LINC01725"; t_id "LINC01725:44"; g_alias_1 "G233008"; g_alias_2 "RP11-475O6.1"; g_alias_3 "G233008.1"; g_alias_4 "9930.1"; g_alias_5 "G233008.5"; g_alias_6 "LINC01725"; g_alias_7 "LOC101927560"; t_alias_1 "E457273"; t_alias_2 "E457273.1"; t_alias_3 "RP11-475O6.1-005"; t_alias_4 "27496.1"; t_alias_5 "NONHSAT004171"; t_alias_6 "NR_119374"; t_alias_7 "E457273.5"; t_alias_8 "NR_119374.1";
chr1 OnePiece Loc 60408 60546 . - . g_id "LINC01725"; t_id "LINC01725:44"; g_alias_1 "G233008"; g_alias_2 "RP11-475O6.1"; g_alias_3 "G233008.1"; g_alias_4 "9930.1"; g_alias_5 "G233008.5"; g_alias_6 "LINC01725"; g_alias_7 "LOC101927560"; t_alias_1 "E457273"; t_alias_2 "E457273.1"; t_alias_3 "RP11-475O6.1-005"; t_alias_4 "27496.1"; t_alias_5 "NONHSAT004171"; t_alias_6 "NR_119374"; t_alias_7 "E457273.5"; t_alias_8 "NR_119374.1";
chr16 OnePiece Loc 92392 92726 . - . g_id "lnc-ZFHX3-27"; t_id "lnc-ZFHX3-27:11"; g_alias_1 "G249447"; g_alias_2 "XLOC_012007"; g_alias_3 "linc-ZFHX3-2"; g_alias_4 "G261404.1"; g_alias_5 "AC009120.4"; g_alias_6 "176255.2"; g_alias_7 "G261404.5"; g_alias_8 "G261404.6"; g_alias_9 "AC138627.1"; g_alias_10 "LOC101928035"; t_alias_1 "E510251"; t_alias_2 "TCONS_00024274"; t_alias_3 "E568137.1"; t_alias_4 "AC009120.4-001"; t_alias_5 "431686.1"; t_alias_6 "NONHSAT143655"; t_alias_7 "NR_104657"; t_alias_8 "NR_104657.1";
chr16 OnePiece Loc 05905 06165 . - . g_id "lnc-ZFHX3-27"; t_id "lnc-ZFHX3-27:11"; g_alias_1 "G249447"; g_alias_2 "XLOC_012007"; g_alias_3 "linc-ZFHX3-2"; g_alias_4 "G261404.1"; g_alias_5 "AC009120.4"; g_alias_6 "176255.2"; g_alias_7 "G261404.5"; g_alias_8 "G261404.6"; g_alias_9 "AC138627.1"; g_alias_10 "LOC101928035"; t_alias_1 "E510251"; t_alias_2 "TCONS_00024274"; t_alias_3 "E568137.1"; t_alias_4 "AC009120.4-001"; t_alias_5 "431686.1"; t_alias_6 "NONHSAT143655"; t_alias_7 "NR_104657"; t_alias_8 "NR_104657.1";
chr16 OnePiece Loc 10306 10505 . - . g_id "lnc-ZFHX3-27"; t_id "lnc-ZFHX3-27:11"; g_alias_1 "G249447"; g_alias_2 "XLOC_012007"; g_alias_3 "linc-ZFHX3-2"; g_alias_4 "G261404.1"; g_alias_5 "AC009120.4"; g_alias_6 "176255.2"; g_alias_7 "G261404.5"; g_alias_8 "G261404.6"; g_alias_9 "AC138627.1"; g_alias_10 "LOC101928035"; t_alias_1 "E510251"; t_alias_2 "TCONS_00024274"; t_alias_3 "E568137.1"; t_alias_4 "AC009120.4-001"; t_alias_5 "431686.1"; t_alias_6 "NONHSAT43655"; t_alias_7 "NR_104657"; t_alias_8 "NR_104657.1";
chr16 OnePiece Loc 15352 15521 . - . g_id "lnc-ZFHX3-27"; t_id "lnc-ZFHX3-27:11"; g_alias_1 "G249447"; g_alias_2 "XLOC_012007"; g_alias_3 "linc-ZFHX3-2"; g_alias_4 "G261404.1"; g_alias_5 "AC009120.4"; g_alias_6 "176255.2"; g_alias_7 "G261404.5"; g_alias_8 "G261404.6"; g_alias_9 "AC138627.1"; g_alias_10 "LOC101928035"; t_alias_1 "E510251"; t_alias_2 "TCONS_00024274"; t_alias_3 "E568137.1"; t_alias_4 "AC009120.4-001"; t_alias_5 "431686.1"; t_alias_6 "NONHSAT143655"; t_alias_7 "NR_104657"; t_alias_8 "NR_104657.1";
I tried playing around with the following codes and it almost worked.
Code:
sed 's/ /\"/g' inputfile
but couldn't get the output that I wanted as above, where there should be a blank space after the field id. For instance:-
The result should be g_id "lnc-ZFHX3-27"; NOT g_id"lnc-ZFHX3-27";
Just need a little bit more help here. thanks