Looks like a csv file. Here's a complete csv parser I wrote in Awk recently:
Code:
BEGIN \
{
while ( get_rec( rec ) )
{
# print CSV_STR
printf "["
sep = ""
for (i=1;i in rec; i++)
{ printf "%s<%s>", sep, rec[i]
sep = ", "
}
print "]"
}
}
function parse_csv( str, array, field,i )
{ split( "", array )
str = str ","
while ( match(str,
/[ \t]*("[^"]*(""[^"]*)*"|[^,]*)[ \t]*,/) )
{ field = substr( str, 1, RLENGTH )
gsub( /^[ \t]*"?|"?[ \t]*,$/, "", field )
gsub( /""/, "\"", field )
array[++i] = field
str = substr( str, RLENGTH + 1 )
}
}
# Handles records that contain linefeeds.
function get_rec( rec, file , result,line,str)
{ do
{ if (file)
result = getline line <file
else
result = getline line
if ( result < 1 )
{ if ( length(str) )
{ print "The csv file is malformed." >"/dev/stderr"
exit 1
}
else
return 0
}
str = str line "\n"
} # Loop until number of quotes is even.
while ( gsub( /"/, "\"", str ) % 2 )
CSV_STR = substr( str, 1, length(str) - 1)
parse_csv( CSV_STR, rec )
return 1
}
With this input
John,Doe,120 jefferson st.,Riverside, NJ , 08075
Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside,NJ ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298,
"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123
the output is
[<John>, <Doe>, <120 jefferson st.>, <Riverside>, <NJ>, <08075>]
[<Jack>, <McGinnis>, <220 hobo Av.>, <Phila>, <PA>, <09119>]
[<John "Da Man">, <Repici>, <120 Jefferson St.>, <Riverside>, <NJ>, <08075>]
[<Stephen>, <Tyler>, <7452 Terrace "At the Plaza" road>, <SomeTown>, <SD>, <91234>]
[<>, <Blankman>, <>, <SomeTown>, <SD>, <00298>, <>]
[<Joan "the bone", Anne>, <Jet>, <9th, at Terrace plc>, <Desert City>, <CO>, <00123>]