Tek-Tips is the largest IT community on the Internet today!

Members share and learn making Tek-Tips Forums the best source of peer-reviewed technical information on the Internet!

  • Congratulations Mike Lewis on being selected by the Tek-Tips community for having the most helpful posts in the forums last week. Way to Go!

Using sed to find matched patterns and append

Status
Not open for further replies.

BunnyMerah

Programmer
Sep 3, 2019
1
MY
Hi,

I am a beginner and I would like to learn and explore sed as it is very powerful one liner to crunch huge data. I have thousands of data that I am working on. Below is the sample:-

Code:
chr1	OnePiece	Loc	01516	03251	.	-	.	g_id LINC01725 ;	t_id LINC01725:44 ;	g_alias_1 G233008 ;	g_alias_2 RP11-475O6.1 ;	g_alias_3 G233008.1 ;	g_alias_4 9930.1 ;	g_alias_5 G233008.5 ;	g_alias_6 LINC01725 ;	g_alias_7 LOC101927560 ;	t_alias_1 E457273 ;	t_alias_2 E457273.1 ;	t_alias_3 RP11-475O6.1-005 ;	t_alias_4 27496.1 ;	t_alias_5 NONHSAT004171 ;	t_alias_6 NR_119374 ;	t_alias_7 E457273.5 ;	t_alias_8 NR_119374.1 ;
chr1	OnePiece	Loc	49907	50022	.	-	.	g_id LINC01725 ;	t_id LINC01725:44 ;	g_alias_1 G233008 ;	g_alias_2 RP11-475O6.1 ;	g_alias_3 G233008.1 ;	g_alias_4 9930.1 ;	g_alias_5 G233008.5 ;	g_alias_6 LINC01725 ;	g_alias_7 LOC101927560 ;	t_alias_1 E457273 ;	t_alias_2 E457273.1 ;	t_alias_3 RP11-475O6.1-005 ;	t_alias_4 27496.1 ;	t_alias_5 NONHSAT004171 ;	t_alias_6 NR_119374 ;	t_alias_7 E457273.5 ;	t_alias_8 NR_119374.1 ;
chr1	OnePiece	Loc	60408	60546	.	-	.	g_id LINC01725 ;	t_id LINC01725:44 ;	g_alias_1 G233008 ;	g_alias_2 RP11-475O6.1 ;	g_alias_3 G233008.1 ;	g_alias_4 9930.1 ;	g_alias_5 G233008.5 ;	g_alias_6 LINC01725 ;	g_alias_7 LOC101927560 ;	t_alias_1 E457273 ;	t_alias_2 E457273.1 ;	t_alias_3 RP11-475O6.1-005 ;	t_alias_4 27496.1 ;	t_alias_5 NONHSAT004171 ;	t_alias_6 NR_119374 ;	t_alias_7 E457273.5 ;	t_alias_8 NR_119374.1 ;
chr16	OnePiece	Loc	92392	92726	.	-	.	g_id lnc-ZFHX3-27 ;	t_id lnc-ZFHX3-27:11 ;	g_alias_1 G249447 ;	g_alias_2 XLOC_012007 ;	g_alias_3 linc-ZFHX3-2 ;	g_alias_4 G261404.1 ;	g_alias_5 AC009120.4 ;	g_alias_6 176255.2 ;	g_alias_7 G261404.5 ;	g_alias_8 G261404.6 ;	g_alias_9 AC138627.1 ;	g_alias_10 LOC101928035 ;	t_alias_1 E510251 ;	t_alias_2 TCONS_00024274 ;	t_alias_3 E568137.1 ;	t_alias_4 AC009120.4-001 ;	t_alias_5 431686.1 ;	t_alias_6 NONHSAT143655 ;	t_alias_7 NR_104657 ;	t_alias_8 NR_104657.1 ;
chr16	OnePiece	Loc	05905	06165	.	-	.	g_id lnc-ZFHX3-27 ;	t_id lnc-ZFHX3-27:11 ;	g_alias_1 G249447 ;	g_alias_2 XLOC_012007 ;	g_alias_3 linc-ZFHX3-2 ;	g_alias_4 G261404.1 ;	g_alias_5 AC009120.4 ;	g_alias_6 176255.2 ;	g_alias_7 G261404.5 ;	g_alias_8 G261404.6 ;	g_alias_9 AC138627.1 ;	g_alias_10 LOC101928035 ;	t_alias_1 E510251 ;	t_alias_2 TCONS_00024274 ;	t_alias_3 E568137.1 ;	t_alias_4 AC009120.4-001 ;	t_alias_5 431686.1 ;	t_alias_6 NONHSAT143655 ;	t_alias_7 NR_104657 ;	t_alias_8 NR_104657.1 ;
chr16	OnePiece	Loc	10306	10505	.	-	.	g_id lnc-ZFHX3-27 ;	t_id lnc-ZFHX3-27:11 ;	g_alias_1 G249447 ;	g_alias_2 XLOC_012007 ;	g_alias_3 linc-ZFHX3-2 ;	g_alias_4 G261404.1 ;	g_alias_5 AC009120.4 ;	g_alias_6 176255.2 ;	g_alias_7 G261404.5 ;	g_alias_8 G261404.6 ;	g_alias_9 AC138627.1 ;	g_alias_10 LOC101928035 ;	t_alias_1 E510251 ;	t_alias_2 TCONS_00024274 ;	t_alias_3 E568137.1 ;	t_alias_4 AC009120.4-001 ;	t_alias_5 431686.1 ;	t_alias_6 NONHSAT143655 ;	t_alias_7 NR_104657 ;	t_alias_8 NR_104657.1 ;
chr16	OnePiece	Loc	15352	15521	.	-	.	g_id lnc-ZFHX3-27 ;	t_id lnc-ZFHX3-27:11 ;	g_alias_1 G249447 ;	g_alias_2 XLOC_012007 ;	g_alias_3 linc-ZFHX3-2 ;	g_alias_4 G261404.1 ;	g_alias_5 AC009120.4 ;	g_alias_6 176255.2 ;	g_alias_7 G261404.5 ;	g_alias_8 G261404.6 ;	g_alias_9 AC138627.1 ;	g_alias_10 LOC101928035 ;	t_alias_1 E510251 ;	t_alias_2 TCONS_00024274 ;	t_alias_3 E568137.1 ;	t_alias_4 AC009120.4-001 ;	t_alias_5 431686.1 ;	t_alias_6 NONHSAT143655 ;	t_alias_7 NR_104657 ;	t_alias_8 NR_104657.1 ;

I need to find pattern " ;" and append the effected columns with quote (") and get the output as follows:-

Code:
chr1	OnePiece	Loc	01516	03251	.	-	.	g_id “LINC01725”;	t_id "LINC01725:44";	g_alias_1 "G233008";	g_alias_2 "RP11-475O6.1";	g_alias_3 "G233008.1";	g_alias_4 "9930.1";	g_alias_5 "G233008.5";	g_alias_6 "LINC01725";	g_alias_7 "LOC101927560";	t_alias_1 "E457273";	t_alias_2 "E457273.1";	t_alias_3 "RP11-475O6.1-005";	t_alias_4 "27496.1";	t_alias_5 "NONHSAT004171";	t_alias_6 "NR_119374";	t_alias_7 "E457273.5";	t_alias_8 "NR_119374.1";
chr1	OnePiece	Loc	49907	50022	.	-	.	g_id "LINC01725";	t_id "LINC01725:44";	g_alias_1 "G233008";	g_alias_2 "RP11-475O6.1";	g_alias_3 "G233008.1";	g_alias_4 "9930.1";	g_alias_5 "G233008.5";	g_alias_6 "LINC01725";	g_alias_7 "LOC101927560";	t_alias_1 "E457273";	t_alias_2 "E457273.1";	t_alias_3 "RP11-475O6.1-005";	t_alias_4 "27496.1";	t_alias_5 "NONHSAT004171";	t_alias_6 "NR_119374";	t_alias_7 "E457273.5";	t_alias_8 "NR_119374.1";
chr1	OnePiece	Loc	60408	60546	.	-	.	g_id "LINC01725";	t_id "LINC01725:44";	g_alias_1 "G233008";	g_alias_2 "RP11-475O6.1";	g_alias_3 "G233008.1";	g_alias_4 "9930.1";	g_alias_5 "G233008.5";	g_alias_6 "LINC01725";	g_alias_7 "LOC101927560";	t_alias_1 "E457273";	t_alias_2 "E457273.1";	t_alias_3 "RP11-475O6.1-005";	t_alias_4 "27496.1";	t_alias_5 "NONHSAT004171";	t_alias_6 "NR_119374";	t_alias_7 "E457273.5";	t_alias_8 "NR_119374.1";
chr16	OnePiece	Loc	92392	92726	.	-	.	g_id "lnc-ZFHX3-27";	t_id "lnc-ZFHX3-27:11";	g_alias_1 "G249447";	g_alias_2 "XLOC_012007";	g_alias_3 "linc-ZFHX3-2";	g_alias_4 "G261404.1";	g_alias_5 "AC009120.4";	g_alias_6 "176255.2";	g_alias_7 "G261404.5";	g_alias_8 "G261404.6";	g_alias_9 "AC138627.1";	g_alias_10 "LOC101928035";	t_alias_1 "E510251";	t_alias_2 "TCONS_00024274";	t_alias_3 "E568137.1";	t_alias_4 "AC009120.4-001";	t_alias_5 "431686.1";	t_alias_6 "NONHSAT143655";	t_alias_7 "NR_104657";	t_alias_8 "NR_104657.1";
chr16	OnePiece	Loc	05905	06165	.	-	.	g_id "lnc-ZFHX3-27";	t_id "lnc-ZFHX3-27:11";	g_alias_1 "G249447";	g_alias_2 "XLOC_012007";	g_alias_3 "linc-ZFHX3-2";	g_alias_4 "G261404.1";	g_alias_5 "AC009120.4";	g_alias_6 "176255.2";	g_alias_7 "G261404.5";	g_alias_8 "G261404.6";	g_alias_9 "AC138627.1";	g_alias_10 "LOC101928035";	t_alias_1 "E510251";	t_alias_2 "TCONS_00024274";	t_alias_3 "E568137.1";	t_alias_4 "AC009120.4-001";	t_alias_5 "431686.1";	t_alias_6 "NONHSAT143655";	t_alias_7 "NR_104657";	t_alias_8 "NR_104657.1";
chr16	OnePiece	Loc	10306	10505	.	-	.	g_id "lnc-ZFHX3-27";	t_id "lnc-ZFHX3-27:11";	g_alias_1 "G249447";	g_alias_2 "XLOC_012007";	g_alias_3 "linc-ZFHX3-2";	g_alias_4 "G261404.1";	g_alias_5 "AC009120.4";	g_alias_6 "176255.2";	g_alias_7 "G261404.5";	g_alias_8 "G261404.6";	g_alias_9 "AC138627.1";	g_alias_10 "LOC101928035";	t_alias_1 "E510251";	t_alias_2 "TCONS_00024274";	t_alias_3 "E568137.1";	t_alias_4 "AC009120.4-001";	t_alias_5 "431686.1";	t_alias_6 "NONHSAT43655";	t_alias_7 "NR_104657";	t_alias_8 "NR_104657.1";
chr16	OnePiece	Loc	15352	15521	.	-	.	g_id "lnc-ZFHX3-27";	t_id "lnc-ZFHX3-27:11";	g_alias_1 "G249447";	g_alias_2 "XLOC_012007";	g_alias_3 "linc-ZFHX3-2";	g_alias_4 "G261404.1";	g_alias_5 "AC009120.4";	g_alias_6 "176255.2";	g_alias_7 "G261404.5";	g_alias_8 "G261404.6";	g_alias_9 "AC138627.1";	g_alias_10 "LOC101928035";	t_alias_1 "E510251";	t_alias_2 "TCONS_00024274";	t_alias_3 "E568137.1";	t_alias_4 "AC009120.4-001";	t_alias_5 "431686.1";	t_alias_6 "NONHSAT143655";	t_alias_7 "NR_104657";	t_alias_8 "NR_104657.1";

I tried playing around with the following codes and it almost worked.
Code:
sed 's/ /\"/g' inputfile

but couldn't get the output that I wanted as above, where there should be a blank space after the field id. For instance:-

The result should be g_id "lnc-ZFHX3-27"; NOT g_id"lnc-ZFHX3-27";

Just need a little bit more help here. thanks
 
I suggest you post in one of these other forums, as your question is not AIX specific; you will get more people's eyes on it. One is for awk, which is a close relative to sed. The other is for General Unix Scripting.




==================================
advanced cognitive capabilities and other marketing buzzwords explained with sarcastic simplicity
 
Status
Not open for further replies.

Part and Inventory Search

Sponsor

Back
Top