Skip to content

Commit e08d316

Browse files
rconnaughtyswelch
authored andcommitted
prov/cxi: Fix bug in constrained LE test cases in test.sh and test_sw.sh
This commit resolves a bug in certain CXI criterion test cases where the le_pools[] max_alloc value was being decreased for all LE pool entries. This resulted in unintended LE append failures + NMIs + inbound wait timeouts related to the CXI eth driver. Now, the test only lowers the max_alloc value for LE pools associated with the default CXI service, which is used during the criterion tests. Additionally, instances of csrutil have been replaced with cxiutil. Signed-off-by: Roger Connaughty <[email protected]>
1 parent 66b1bbf commit e08d316

File tree

2 files changed

+34
-19
lines changed

2 files changed

+34
-19
lines changed

prov/cxi/test/test.sh

+10-8
Original file line numberDiff line numberDiff line change
@@ -74,18 +74,20 @@ basic_test=("./cxitest --verbose --tap=cxitest.tap -j 1")
7474

7575
swget_test=(
7676
"FI_CXI_RGET_TC=BULK_DATA ./cxitest --verbose --filter=\"@(tagged|msg)/*\" --tap=cxitest-swget.tap -j1"
77-
"csrutil store csr C_LPE_CFG_GET_CTRL get_en=0 > /dev/null"
78-
"csrutil store csr C_LPE_CFG_GET_CTRL get_en=1 > /dev/null")
77+
"cxiutil store csr C_LPE_CFG_GET_CTRL get_en=0 > /dev/null"
78+
"cxiutil store csr C_LPE_CFG_GET_CTRL get_en=1 > /dev/null")
7979

8080
swget_unaligned_test=(
8181
"FI_CXI_RDZV_THRESHOLD=2036 ./cxitest --verbose --filter=\"@(tagged|msg)/*\" --tap=cxitest-swget-unaligned.tap -j1"
82-
"csrutil store csr C_LPE_CFG_GET_CTRL get_en=0 > /dev/null"
83-
"csrutil store csr C_LPE_CFG_GET_CTRL get_en=1 > /dev/null")
82+
"cxiutil store csr C_LPE_CFG_GET_CTRL get_en=0 > /dev/null"
83+
"cxiutil store csr C_LPE_CFG_GET_CTRL get_en=1 > /dev/null")
8484

8585
constrained_le_test=(
8686
"FI_CXI_DEFAULT_CQ_SIZE=16384 ./cxitest --verbose --filter=\"@(tagged|msg)/fc*\" --tap=cxitest-constrained-le.tap -j1"
87-
"MAX_ALLOC=\$(csrutil dump csr le_pools[63] | grep max_alloc | awk '{print \$3}'); echo \"Saving MAX_ALLOC=\$MAX_ALLOC\"; csrutil store csr le_pools[] max_alloc=10 > /dev/null"
88-
"echo \"Restoring MAX_ALLOC=\$MAX_ALLOC\"; csrutil store csr le_pools[] max_alloc=\$MAX_ALLOC > /dev/null")
87+
"MAX_ALLOC=\$(cxiutil dump csr le_pools[63] | grep max_alloc | awk '{print \$3}'); echo \"Saving MAX_ALLOC=\$MAX_ALLOC\";
88+
cxiutil store csr le_pools[0] max_alloc=10 > /dev/null; cxiutil store csr le_pools[16] max_alloc=10 > /dev/null;
89+
cxiutil store csr le_pools[32] max_alloc=10 > /dev/null; cxiutil store csr le_pools[48] max_alloc=10 > /dev/null"
90+
"echo \"Restoring MAX_ALLOC=\$MAX_ALLOC\"; cxiutil store csr le_pools[] max_alloc=\$MAX_ALLOC > /dev/null")
8991

9092
hw_matching_rendezvous_test=(
9193
"FI_CXI_DEVICE_NAME=\"cxi1,cxi0\" FI_CXI_RDZV_GET_MIN=0 FI_CXI_RDZV_THRESHOLD=2048 ./cxitest --verbose -j 1 --filter=\"tagged_directed/*\" --tap=cxitest-hw-rdzv-tag-matching.tap")
@@ -119,8 +121,8 @@ zero_eager_size_test=(
119121

120122
alt_read_rendezvous_test=(
121123
"FI_CXI_RDZV_PROTO=\"alt_read\" ./cxitest --filter=\"tagged/*rdzv\" -j 1 -f --verbose --tap=cxitest-alt-read-rdzv.tap"
122-
"csrutil store csr C_LPE_CFG_GET_CTRL get_en=0 > /dev/null"
123-
"csrutil store csr C_LPE_CFG_GET_CTRL get_en=1 > /dev/null")
124+
"cxiutil store csr C_LPE_CFG_GET_CTRL get_en=0 > /dev/null"
125+
"cxiutil store csr C_LPE_CFG_GET_CTRL get_en=1 > /dev/null")
124126

125127
mr_mode_no_compat_test=(
126128
"FI_CXI_COMPAT=0 ./cxitest -j 1 --filter=\"getinfo_infos/*\" -f --verbose --tap=cxitest-mr-mode-no-compat.tap")

prov/cxi/test/test_sw.sh

+24-11
Original file line numberDiff line numberDiff line change
@@ -22,48 +22,61 @@ export FI_LOG_PROV=cxi
2222
#fi
2323

2424
# Run tests with constrained LE count - Using Flow Control recovery
25-
MAX_ALLOC=`csrutil dump csr le_pools[63] |grep max_alloc |awk '{print $3}'`
26-
csrutil store csr le_pools[] max_alloc=10 > /dev/null
25+
MAX_ALLOC=`cxiutil dump csr le_pools[63] |grep max_alloc |awk '{print $3}'`
26+
cxiutil store csr le_pools[0] max_alloc=10 > /dev/null
27+
cxiutil store csr le_pools[16] max_alloc=10 > /dev/null
28+
cxiutil store csr le_pools[32] max_alloc=10 > /dev/null
29+
cxiutil store csr le_pools[48] max_alloc=10 > /dev/null
2730
echo "running;FI_CXI_RX_MATCH_MODE=hardware ./cxitest --verbose --filter=\"tagged/fc*\" --tap=cxitest-fc.tap -j1 > $TEST_OUTPUT 2>&1"
2831
FI_CXI_RX_MATCH_MODE=hardware ./cxitest --verbose --filter="tagged/fc*" --tap=cxitest-fc.tap -j1 > $TEST_OUTPUT 2>&1
2932
cxitest_exit_status=$?
30-
csrutil store csr le_pools[] max_alloc=$MAX_ALLOC > /dev/null
33+
cxiutil store csr le_pools[] max_alloc=$MAX_ALLOC > /dev/null
3134
if [[ $cxitest_exit_status -ne 0 ]]; then
3235
echo "cxitest return non-zero exit code. Possible failures in test teardown"
3336
exit 1
3437
fi
3538

3639
# Run tests with constrained LE count - Using hybrid operation instead
3740
# of flow control recovery
38-
MAX_ALLOC=`csrutil dump csr le_pools[63] |grep max_alloc |awk '{print $3}'`
39-
csrutil store csr le_pools[] max_alloc=10 > /dev/null
41+
MAX_ALLOC=`cxiutil dump csr le_pools[63] |grep max_alloc |awk '{print $3}'`
42+
cxiutil store csr le_pools[0] max_alloc=10 > /dev/null
43+
cxiutil store csr le_pools[16] max_alloc=10 > /dev/null
44+
cxiutil store csr le_pools[32] max_alloc=10 > /dev/null
45+
cxiutil store csr le_pools[48] max_alloc=10 > /dev/null
4046
echo "running;FI_CXI_RX_MATCH_MODE=hybrid FI_CXI_RDZV_GET_MIN=0 ./cxitest --verbose --filter=\"tagged/fc*\" --tap=cxitest-sw-transition.tap -j1 >> $TEST_OUTPUT 2>&1"
4147
FI_CXI_RX_MATCH_MODE=hybrid FI_CXI_RDZV_GET_MIN=0 ./cxitest --verbose --filter="tagged/fc*" --tap=cxitest-sw-transition.tap -j1 >> $TEST_OUTPUT 2>&1
4248
cxitest_exit_status=$?
43-
csrutil store csr le_pools[] max_alloc=$MAX_ALLOC > /dev/null
49+
cxiutil store csr le_pools[] max_alloc=$MAX_ALLOC > /dev/null
4450
if [[ $cxitest_exit_status -ne 0 ]]; then
4551
echo "cxitest return non-zero exit code. Possible failures in test teardown"
4652
exit 1
4753
fi
4854

4955
# Run HW to SW hybrid test with constrained LE count and forcing both
5056
# eager and rendezvous processing
51-
MAX_ALLOC=`csrutil dump csr le_pools[63] |grep max_alloc |awk '{print $3}'`
52-
csrutil store csr le_pools[] max_alloc=60 > /dev/null
57+
MAX_ALLOC=`cxiutil dump csr le_pools[63] |grep max_alloc |awk '{print $3}'`
58+
cxiutil store csr le_pools[0] max_alloc=60 > /dev/null
59+
cxiutil store csr le_pools[16] max_alloc=60 > /dev/null
60+
cxiutil store csr le_pools[32] max_alloc=60 > /dev/null
61+
cxiutil store csr le_pools[48] max_alloc=60 > /dev/null
5362
echo "running;FI_CXI_RX_MATCH_MODE=hybrid FI_CXI_RDZV_GET_MIN=0 FI_CXI_RDZV_THRESHOLD=2048 ./cxitest --verbose --filter=\"tagged/hw2sw_*\" --tap=cxitest-hw2sw-transition.tap -j1 >> $TEST_OUTPUT 2>&1"
5463
FI_CXI_RX_MATCH_MODE=hybrid FI_CXI_RDZV_GET_MIN=0 FI_CXI_RDZV_THRESHOLD=2048 ./cxitest --verbose --filter="tagged/hw2sw_*" --tap=cxitest-hw2sw-transition.tap -j1 >> $TEST_OUTPUT 2>&1
55-
csrutil store csr le_pools[] max_alloc=$MAX_ALLOC > /dev/null
64+
cxiutil store csr le_pools[] max_alloc=$MAX_ALLOC > /dev/null
5665
if [[ $cxitest_exit_status -ne 0 ]]; then
5766
echo "cxitest return non-zero exit code. Possible failures in test teardown"
5867
exit 1
5968
fi
6069

6170
# Run HW to SW hybrid test with constrained LE count and forcing only eager processing
62-
MAX_ALLOC=`csrutil dump csr le_pools[63] |grep max_alloc |awk '{print $3}'`
71+
MAX_ALLOC=`cxiutil dump csr le_pools[63] |grep max_alloc |awk '{print $3}'`
72+
cxiutil store csr le_pools[0] max_alloc=60 > /dev/null
73+
cxiutil store csr le_pools[16] max_alloc=60 > /dev/null
74+
cxiutil store csr le_pools[32] max_alloc=60 > /dev/null
75+
cxiutil store csr le_pools[48] max_alloc=60 > /dev/null
6376
echo "running;FI_CXI_RX_MATCH_MODE=hybrid FI_CXI_RDZV_GET_MIN=0 FI_CXI_RDZV_THRESHOLD=16384 ./cxitest --verbose --filter=\"tagged/hw2sw_*\" --tap=cxitest-hw2sw-eager-transition.tap -j1 >> $TEST_OUTPUT 2>&1"
6477
FI_CXI_RX_MATCH_MODE=hybrid FI_CXI_RDZV_GET_MIN=0 FI_CXI_RDZV_THRESHOLD=16384 ./cxitest --verbose --filter="tagged/hw2sw_*" --tap=cxitest-hw2sw-transition.tap -j1 >> $TEST_OUTPUT 2>&1
6578
cxitest_exit_status=$?
66-
csrutil store csr le_pools[] max_alloc=$MAX_ALLOC > /dev/null
79+
cxiutil store csr le_pools[] max_alloc=$MAX_ALLOC > /dev/null
6780
if [[ $cxitest_exit_status -ne 0 ]]; then
6881
echo "cxitest return non-zero exit code. Possible failures in test teardown"
6982
exit 1

0 commit comments

Comments
 (0)