Hi,
I recently just started writing awk. I managed to create a script, that takes two files, creates arrays, searches them and finally prints. Unfortunately my script tends to take very long time when I use large data files.
Is there someone who can help me speed things up?
Script:
#!/bin/awk -f
BEGIN {
i = 1
while ( (getline < "file2.txt") > 0 )
{
fromf2[i,1] = $1
fromf2[i,4] = $4
fromf2[i,5] = $5
fromf2[i,6] = $6
fromf2[i,7] = $7
i = i +1
}
}
{
oli1 = $1 ;
oli2 = $2 ;
str1 = $3 ;
str2 = $4 ;
str3 = $5 ;
checker = 0 ;
for (x = 1; x <= i; x++) {
look0 = fromf2[x,1]
look1 = fromf2[x,4]
look2 = fromf2[x,5]
look3 = fromf2[x,6]
look4 = fromf2[x,7]
if ((str1 == look1 || str1 == look2 || str1 == look3 || str1 == look4) && (str2 == look1 || str2 == look2 || str2 == look3 || str2 == look4) && (str3 == look1 || str3 == look2 || str3 == look3 || str3 == look4))
{
print oli1" "oli2" "look0" "0" "303" "str1" "str2" "str3
checker = 1
}
}
if ( checker == 0)
{
print oli1" "oli2" "0" "0" "303" "str1" "str2" "str3
}
}
And file1 looks like:
1 {test} 2944 2945 2950
2 {test} 2945 2950 2951
3 {test} 2945 2946 2951
4 {test} 2946 2951 2952
5 {test} 2946 2947 2952
6 {test} 2947 2952 2953
7 {test} 2947 2948 2953
8 {test} 2948 2953 2954
9 {test} 2948 2949 2954
And file2 looks like:
23473 {first second} 1313 2622 11965 16211
23474 {first second} 11130 11131 11359 15159
23475 {first second} 2944 2945 2950 17152
23476 {first second} 9423 9424 9434 16767
23477 {first second} 1795 1945 11486 16613
23478 {first second} 4317 2953 2948 2947
Thanks,
Robert
I recently just started writing awk. I managed to create a script, that takes two files, creates arrays, searches them and finally prints. Unfortunately my script tends to take very long time when I use large data files.
Is there someone who can help me speed things up?
Script:
#!/bin/awk -f
BEGIN {
i = 1
while ( (getline < "file2.txt") > 0 )
{
fromf2[i,1] = $1
fromf2[i,4] = $4
fromf2[i,5] = $5
fromf2[i,6] = $6
fromf2[i,7] = $7
i = i +1
}
}
{
oli1 = $1 ;
oli2 = $2 ;
str1 = $3 ;
str2 = $4 ;
str3 = $5 ;
checker = 0 ;
for (x = 1; x <= i; x++) {
look0 = fromf2[x,1]
look1 = fromf2[x,4]
look2 = fromf2[x,5]
look3 = fromf2[x,6]
look4 = fromf2[x,7]
if ((str1 == look1 || str1 == look2 || str1 == look3 || str1 == look4) && (str2 == look1 || str2 == look2 || str2 == look3 || str2 == look4) && (str3 == look1 || str3 == look2 || str3 == look3 || str3 == look4))
{
print oli1" "oli2" "look0" "0" "303" "str1" "str2" "str3
checker = 1
}
}
if ( checker == 0)
{
print oli1" "oli2" "0" "0" "303" "str1" "str2" "str3
}
}
And file1 looks like:
1 {test} 2944 2945 2950
2 {test} 2945 2950 2951
3 {test} 2945 2946 2951
4 {test} 2946 2951 2952
5 {test} 2946 2947 2952
6 {test} 2947 2952 2953
7 {test} 2947 2948 2953
8 {test} 2948 2953 2954
9 {test} 2948 2949 2954
And file2 looks like:
23473 {first second} 1313 2622 11965 16211
23474 {first second} 11130 11131 11359 15159
23475 {first second} 2944 2945 2950 17152
23476 {first second} 9423 9424 9434 16767
23477 {first second} 1795 1945 11486 16613
23478 {first second} 4317 2953 2948 2947
Thanks,
Robert