Hello,
I'm coming across this odd issue where every so often (really sporadic), my p5 510 running 1.9Ghz 1-way with 5GB of RAM on AIX 5.3 ML04 will run in a super slow state. It always happens on a weekday, always between 9:15AM - 10:00AM, and the only fix seems to be a reboot. I try to run commands during this state but the response is too long for me to wait since this is a Production machine. I run my commands from the console too, but still takes too long. I can ping the server, but my oracle apps can't connect.
I can't really tell if the system is 'Thrashing', but isn't that a tell-tale sign of a system that takes forever to respond?
The SGA size is 1.8GB.
I am using CIO mounted filesystems for Oracle. Since I'm using CIO, isn't tweaking the vmo values moot?
I did do a test yesterday w/ our DBA and had him run several SQL statements that ran reads/writes to our Oracle Database and there were high periods of 'po' values in vmstat when ran all at the same time.
In the /etc/security/limits file, here is what I have:
oracle:
fsize = -1
data = -1
rss = -1
stack = -1
cpu = -1
nofiles = -1
This pretty much allows oracle user unlimited usage of resources including memory. I was told by Oracle to do this. What do you guys think of putting a hard limit on rss_hard instead so oracle can't usurp all of the memory.
Here are some statistics of the system during window I've specified above.
# vmstat -I 2
kthr memory page faults cpu
-------- ----------- ------------------------ ------------ -----------
r b p avm fre fi fo pi po fr sr in sy cs us sy id wa
1 0 0 1269791 2305 271 25 150 0 183 5736 281 2081 616 22 3 37 39
0 1 0 1269826 3601 250 73 174 60 1128 32260 400 4178 920 17 5 37 41
2 1 0 1264021 8311 410 6 137 0 0 0 285 2690 729 17 2 39 41
0 1 0 1262058 9109 410 32 154 0 0 0 341 2006 737 14 3 41 43
1 1 0 1265977 4259 327 4 138 0 0 0 270 2932 594 22 3 37 38
0 1 0 1270081 2322 301 8 162 27 1546 34085 577 4924 1136 17 5 38 40
1 1 0 1270085 3823 310 22 164 44 1260 18072 442 3102 957 14 4 40 42
1 1 0 1264271 8398 485 26 145 0 0 0 285 1333 648 9 2 43 45
1 1 0 1262308 9653 206 7 148 0 0 0 256 1993 565 23 2 37 38
1 0 0 1266088 5085 271 30 104 0 0 0 259 2841 563 39 3 28 31
2 0 0 1264067 8189 561 11 78 31 1187 23368 352 6659 754 58 6 17 19
5 0 0 1272173 3331 801 9 72 574 2266 32042 455 9520 720 42 7 24 27
1 1 0 1266288 7477 807 6 62 0 0 0 280 2056 576 35 2 30 32
1 1 0 1266024 6482 589 5 37 0 0 0 582 6605 1196 51 4 22 23
2 0 0 1270404 2423 562 11 45 137 769 5268 648 10259 1271 52 7 20 21
0 1 0 1274318 2226 682 6 48 1543 2633 3702 488 4457 585 39 7 27 28
0 1 0 1269710 6955 60 17 40 129 129 151 546 9169 1115 28 6 54 12
# svmon -G
size inuse free pin virtual
memory 1261568 1245228 16340 132874 1188198
pg space 3637248 254426
work pers clnt
pin 132874 0 0
in use 1024221 0 221007
PageSize PoolSize inuse pgsp pin virtual
s 4 KB - 1194956 238202 100378 1122966
m 64 KB - 3142 1014 2031 4077
#
# lsps -a
Page Space Physical Volume Volume Group Size %Used Active Auto Type
paging00 hdisk3 oraclevg 8192MB 8 yes yes lv
hd6 hdisk1 rootvg 6016MB 7 yes yes lv
#
# vmo –a
cpu_scale_memp = 8
data_stagger_interval = 161
defps = 1
force_relalias_lite = 0
framesets = 2
htabscale = n/a
kernel_heap_psize = 4096
large_page_heap_size = 0
lgpg_regions = 0
lgpg_size = 0
low_ps_handling = 1
lru_file_repage = 0
lru_poll_interval = 10
lrubucket = 131072
maxclient% = 80
maxfree = 1088
maxperm = 968863
maxperm% = 80
maxpin = 1018607
maxpin% = 80
mbuf_heap_psize = 65536
memory_affinity = 1
memory_frames = 1261568
memplace_data = 2
memplace_mapped_file = 2
memplace_shm_anonymous = 2
memplace_shm_named = 2
memplace_stack = 2
memplace_text = 2
memplace_unmapped_file = 2
mempools = 1
minfree = 960
minperm = 242215
minperm% = 20
nokilluid = 0
npskill = 36608
npsrpgmax = 292864
npsrpgmin = 219648
npsscrubmax = 292864
npsscrubmin = 219648
npswarn = 146432
num_spec_dataseg = 0
numpsblks = 4685824
page_steal_method = 0
pagecoloring = n/a
pinnable_frames = 1144691
npsrpgmin = 219648
npsscrubmax = 292864
npsscrubmin = 219648
npswarn = 146432
num_spec_dataseg = 0
numpsblks = 4685824
page_steal_method = 0
pagecoloring = n/a
pinnable_frames = 1144691
pta_balance_threshold = n/a
relalias_percentage = 0
rpgclean = 0
rpgcontrol = 2
scrub = 0
scrubclean = 0
soft_min_lgpgs_vmpool = 0
spec_dataseg_int = 512
strict_maxclient = 1
strict_maxperm = 0
v_pinshm = 0
vm_modlist_threshold = -1
vmm_fork_policy = 1
vmm_mpsize_support = 1
I just think we have too little physical memory for our workload, but I would love to hear what you guys think and how your AIX/Oracle environments are tuned.
Thanks!
I'm coming across this odd issue where every so often (really sporadic), my p5 510 running 1.9Ghz 1-way with 5GB of RAM on AIX 5.3 ML04 will run in a super slow state. It always happens on a weekday, always between 9:15AM - 10:00AM, and the only fix seems to be a reboot. I try to run commands during this state but the response is too long for me to wait since this is a Production machine. I run my commands from the console too, but still takes too long. I can ping the server, but my oracle apps can't connect.
I can't really tell if the system is 'Thrashing', but isn't that a tell-tale sign of a system that takes forever to respond?
The SGA size is 1.8GB.
I am using CIO mounted filesystems for Oracle. Since I'm using CIO, isn't tweaking the vmo values moot?
I did do a test yesterday w/ our DBA and had him run several SQL statements that ran reads/writes to our Oracle Database and there were high periods of 'po' values in vmstat when ran all at the same time.
In the /etc/security/limits file, here is what I have:
oracle:
fsize = -1
data = -1
rss = -1
stack = -1
cpu = -1
nofiles = -1
This pretty much allows oracle user unlimited usage of resources including memory. I was told by Oracle to do this. What do you guys think of putting a hard limit on rss_hard instead so oracle can't usurp all of the memory.
Here are some statistics of the system during window I've specified above.
# vmstat -I 2
kthr memory page faults cpu
-------- ----------- ------------------------ ------------ -----------
r b p avm fre fi fo pi po fr sr in sy cs us sy id wa
1 0 0 1269791 2305 271 25 150 0 183 5736 281 2081 616 22 3 37 39
0 1 0 1269826 3601 250 73 174 60 1128 32260 400 4178 920 17 5 37 41
2 1 0 1264021 8311 410 6 137 0 0 0 285 2690 729 17 2 39 41
0 1 0 1262058 9109 410 32 154 0 0 0 341 2006 737 14 3 41 43
1 1 0 1265977 4259 327 4 138 0 0 0 270 2932 594 22 3 37 38
0 1 0 1270081 2322 301 8 162 27 1546 34085 577 4924 1136 17 5 38 40
1 1 0 1270085 3823 310 22 164 44 1260 18072 442 3102 957 14 4 40 42
1 1 0 1264271 8398 485 26 145 0 0 0 285 1333 648 9 2 43 45
1 1 0 1262308 9653 206 7 148 0 0 0 256 1993 565 23 2 37 38
1 0 0 1266088 5085 271 30 104 0 0 0 259 2841 563 39 3 28 31
2 0 0 1264067 8189 561 11 78 31 1187 23368 352 6659 754 58 6 17 19
5 0 0 1272173 3331 801 9 72 574 2266 32042 455 9520 720 42 7 24 27
1 1 0 1266288 7477 807 6 62 0 0 0 280 2056 576 35 2 30 32
1 1 0 1266024 6482 589 5 37 0 0 0 582 6605 1196 51 4 22 23
2 0 0 1270404 2423 562 11 45 137 769 5268 648 10259 1271 52 7 20 21
0 1 0 1274318 2226 682 6 48 1543 2633 3702 488 4457 585 39 7 27 28
0 1 0 1269710 6955 60 17 40 129 129 151 546 9169 1115 28 6 54 12
# svmon -G
size inuse free pin virtual
memory 1261568 1245228 16340 132874 1188198
pg space 3637248 254426
work pers clnt
pin 132874 0 0
in use 1024221 0 221007
PageSize PoolSize inuse pgsp pin virtual
s 4 KB - 1194956 238202 100378 1122966
m 64 KB - 3142 1014 2031 4077
#
# lsps -a
Page Space Physical Volume Volume Group Size %Used Active Auto Type
paging00 hdisk3 oraclevg 8192MB 8 yes yes lv
hd6 hdisk1 rootvg 6016MB 7 yes yes lv
#
# vmo –a
cpu_scale_memp = 8
data_stagger_interval = 161
defps = 1
force_relalias_lite = 0
framesets = 2
htabscale = n/a
kernel_heap_psize = 4096
large_page_heap_size = 0
lgpg_regions = 0
lgpg_size = 0
low_ps_handling = 1
lru_file_repage = 0
lru_poll_interval = 10
lrubucket = 131072
maxclient% = 80
maxfree = 1088
maxperm = 968863
maxperm% = 80
maxpin = 1018607
maxpin% = 80
mbuf_heap_psize = 65536
memory_affinity = 1
memory_frames = 1261568
memplace_data = 2
memplace_mapped_file = 2
memplace_shm_anonymous = 2
memplace_shm_named = 2
memplace_stack = 2
memplace_text = 2
memplace_unmapped_file = 2
mempools = 1
minfree = 960
minperm = 242215
minperm% = 20
nokilluid = 0
npskill = 36608
npsrpgmax = 292864
npsrpgmin = 219648
npsscrubmax = 292864
npsscrubmin = 219648
npswarn = 146432
num_spec_dataseg = 0
numpsblks = 4685824
page_steal_method = 0
pagecoloring = n/a
pinnable_frames = 1144691
npsrpgmin = 219648
npsscrubmax = 292864
npsscrubmin = 219648
npswarn = 146432
num_spec_dataseg = 0
numpsblks = 4685824
page_steal_method = 0
pagecoloring = n/a
pinnable_frames = 1144691
pta_balance_threshold = n/a
relalias_percentage = 0
rpgclean = 0
rpgcontrol = 2
scrub = 0
scrubclean = 0
soft_min_lgpgs_vmpool = 0
spec_dataseg_int = 512
strict_maxclient = 1
strict_maxperm = 0
v_pinshm = 0
vm_modlist_threshold = -1
vmm_fork_policy = 1
vmm_mpsize_support = 1
I just think we have too little physical memory for our workload, but I would love to hear what you guys think and how your AIX/Oracle environments are tuned.
Thanks!