Skip to content

Commit 6fff219

Browse files
authored
Merge pull request #97 from filecoin-project/fix/less-write-lk-contention
Less write lock contention, better read timeout handling
2 parents 787a96a + fc81a98 commit 6fff219

File tree

5 files changed

+323
-32
lines changed

5 files changed

+323
-32
lines changed

client.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@ import (
1111
"net/http"
1212
"net/url"
1313
"reflect"
14+
"runtime/pprof"
1415
"sync/atomic"
1516
"time"
1617

18+
"github.com/google/uuid"
1719
"github.com/gorilla/websocket"
1820
logging "github.com/ipfs/go-log/v2"
1921
"go.opencensus.io/trace"
@@ -238,7 +240,7 @@ func websocketClient(ctx context.Context, addr string, namespace string, outs []
238240
hnd = h
239241
}
240242

241-
go (&wsConn{
243+
wconn := &wsConn{
242244
conn: conn,
243245
connFactory: connFactory,
244246
reconnectBackoff: config.reconnectBackoff,
@@ -248,7 +250,14 @@ func websocketClient(ctx context.Context, addr string, namespace string, outs []
248250
requests: requests,
249251
stop: stop,
250252
exiting: exiting,
251-
}).handleWsConn(ctx)
253+
}
254+
255+
go func() {
256+
lbl := pprof.Labels("jrpc-mode", "wsclient", "jrpc-remote", addr, "jrpc-local", conn.LocalAddr().String(), "jrpc-uuid", uuid.New().String())
257+
pprof.Do(ctx, lbl, func(ctx context.Context) {
258+
wconn.handleWsConn(ctx)
259+
})
260+
}()
252261

253262
if err := c.provide(outs); err != nil {
254263
return nil, err

handler.go

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,11 +446,48 @@ func (s *handler) handle(ctx context.Context, req request, w func(func(io.Writer
446446
log.Errorw("error and res returned", "request", req, "r.err", resp.Error, "res", res)
447447
}
448448

449-
w(func(w io.Writer) {
449+
withLazyWriter(w, func(w io.Writer) {
450450
if err := json.NewEncoder(w).Encode(resp); err != nil {
451451
log.Error(err)
452452
stats.Record(ctx, metrics.RPCResponseError.M(1))
453453
return
454454
}
455455
})
456456
}
457+
458+
// withLazyWriter makes it possible to defer acquiring a writer until the first write.
459+
// This is useful because json.Encode needs to marshal the response fully before writing, which may be
460+
// a problem for very large responses.
461+
func withLazyWriter(withWriterFunc func(func(io.Writer)), cb func(io.Writer)) {
462+
lw := &lazyWriter{
463+
withWriterFunc: withWriterFunc,
464+
465+
done: make(chan struct{}),
466+
}
467+
468+
defer close(lw.done)
469+
cb(lw)
470+
}
471+
472+
type lazyWriter struct {
473+
withWriterFunc func(func(io.Writer))
474+
475+
w io.Writer
476+
done chan struct{}
477+
}
478+
479+
func (lw *lazyWriter) Write(p []byte) (n int, err error) {
480+
if lw.w == nil {
481+
acquired := make(chan struct{})
482+
go func() {
483+
lw.withWriterFunc(func(w io.Writer) {
484+
lw.w = w
485+
close(acquired)
486+
<-lw.done
487+
})
488+
}()
489+
<-acquired
490+
}
491+
492+
return lw.w.Write(p)
493+
}

rpc_test.go

Lines changed: 155 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@ import (
1010
"net"
1111
"net/http"
1212
"net/http/httptest"
13+
"os"
1314
"reflect"
1415
"strconv"
1516
"strings"
1617
"sync"
18+
"sync/atomic"
1719
"testing"
1820
"time"
1921

@@ -33,7 +35,7 @@ func init() {
3335
}
3436

3537
type SimpleServerHandler struct {
36-
n int
38+
n int32
3739
}
3840

3941
type TestType struct {
@@ -57,14 +59,14 @@ func (h *SimpleServerHandler) Add(in int) error {
5759
return errors.New("test")
5860
}
5961

60-
h.n += in
62+
atomic.AddInt32(&h.n, int32(in))
6163

6264
return nil
6365
}
6466

6567
func (h *SimpleServerHandler) AddGet(in int) int {
66-
h.n += in
67-
return h.n
68+
atomic.AddInt32(&h.n, int32(in))
69+
return int(h.n)
6870
}
6971

7072
func (h *SimpleServerHandler) StringMatch(t TestType, i2 int64) (out TestOut, err error) {
@@ -88,7 +90,7 @@ func TestRawRequests(t *testing.T) {
8890
testServ := httptest.NewServer(rpcServer)
8991
defer testServ.Close()
9092

91-
tc := func(req, resp string, n int) func(t *testing.T) {
93+
tc := func(req, resp string, n int32) func(t *testing.T) {
9294
return func(t *testing.T) {
9395
rpcHandler.n = 0
9496

@@ -225,7 +227,7 @@ func TestRPC(t *testing.T) {
225227
// Add(int) error
226228

227229
require.NoError(t, client.Add(2))
228-
require.Equal(t, 2, serverHandler.n)
230+
require.Equal(t, 2, int(serverHandler.n))
229231

230232
err = client.Add(-3546)
231233
require.EqualError(t, err, "test")
@@ -234,7 +236,7 @@ func TestRPC(t *testing.T) {
234236

235237
n := client.AddGet(3)
236238
require.Equal(t, 5, n)
237-
require.Equal(t, 5, serverHandler.n)
239+
require.Equal(t, 5, int(serverHandler.n))
238240

239241
// StringMatch
240242

@@ -268,7 +270,7 @@ func TestRPC(t *testing.T) {
268270

269271
// this one should actually work
270272
noret.Add(4)
271-
require.Equal(t, 9, serverHandler.n)
273+
require.Equal(t, 9, int(serverHandler.n))
272274
closer()
273275

274276
var noparam struct {
@@ -343,7 +345,7 @@ func TestRPCHttpClient(t *testing.T) {
343345
// Add(int) error
344346

345347
require.NoError(t, client.Add(2))
346-
require.Equal(t, 2, serverHandler.n)
348+
require.Equal(t, 2, int(serverHandler.n))
347349

348350
err = client.Add(-3546)
349351
require.EqualError(t, err, "test")
@@ -352,7 +354,7 @@ func TestRPCHttpClient(t *testing.T) {
352354

353355
n := client.AddGet(3)
354356
require.Equal(t, 5, n)
355-
require.Equal(t, 5, serverHandler.n)
357+
require.Equal(t, 5, int(serverHandler.n))
356358

357359
// StringMatch
358360

@@ -379,7 +381,7 @@ func TestRPCHttpClient(t *testing.T) {
379381

380382
// this one should actually work
381383
noret.Add(4)
382-
require.Equal(t, 9, serverHandler.n)
384+
require.Equal(t, 9, int(serverHandler.n))
383385
closer()
384386

385387
var noparam struct {
@@ -429,6 +431,41 @@ func TestRPCHttpClient(t *testing.T) {
429431
closer()
430432
}
431433

434+
func TestParallelRPC(t *testing.T) {
435+
// setup server
436+
437+
serverHandler := &SimpleServerHandler{}
438+
439+
rpcServer := NewServer()
440+
rpcServer.Register("SimpleServerHandler", serverHandler)
441+
442+
// httptest stuff
443+
testServ := httptest.NewServer(rpcServer)
444+
defer testServ.Close()
445+
// setup client
446+
447+
var client struct {
448+
Add func(int) error
449+
}
450+
closer, err := NewClient(context.Background(), "ws://"+testServ.Listener.Addr().String(), "SimpleServerHandler", &client, nil)
451+
require.NoError(t, err)
452+
defer closer()
453+
454+
var wg sync.WaitGroup
455+
for i := 0; i < 100; i++ {
456+
wg.Add(1)
457+
go func() {
458+
defer wg.Done()
459+
for j := 0; j < 100; j++ {
460+
require.NoError(t, client.Add(2))
461+
}
462+
}()
463+
}
464+
wg.Wait()
465+
466+
require.Equal(t, 20000, int(serverHandler.n))
467+
}
468+
432469
type CtxHandler struct {
433470
lk sync.Mutex
434471

@@ -1414,3 +1451,110 @@ func TestReverseCallAliased(t *testing.T) {
14141451

14151452
closer()
14161453
}
1454+
1455+
type BigCallTestServerHandler struct {
1456+
}
1457+
1458+
type RecRes struct {
1459+
I int
1460+
R []RecRes
1461+
}
1462+
1463+
func (h *BigCallTestServerHandler) Do() (RecRes, error) {
1464+
var res RecRes
1465+
res.I = 123
1466+
1467+
for i := 0; i < 15000; i++ {
1468+
var ires RecRes
1469+
ires.I = i
1470+
1471+
for j := 0; j < 15000; j++ {
1472+
var jres RecRes
1473+
jres.I = j
1474+
1475+
ires.R = append(ires.R, jres)
1476+
}
1477+
1478+
res.R = append(res.R, ires)
1479+
}
1480+
1481+
fmt.Println("sending result")
1482+
1483+
return res, nil
1484+
}
1485+
1486+
func (h *BigCallTestServerHandler) Ch(ctx context.Context) (<-chan int, error) {
1487+
out := make(chan int)
1488+
1489+
go func() {
1490+
var i int
1491+
for {
1492+
select {
1493+
case <-ctx.Done():
1494+
fmt.Println("closing")
1495+
close(out)
1496+
return
1497+
case <-time.After(time.Second):
1498+
}
1499+
fmt.Println("sending")
1500+
out <- i
1501+
i++
1502+
}
1503+
}()
1504+
1505+
return out, nil
1506+
}
1507+
1508+
// TestBigResult tests that the connection doesn't die when sending a large result,
1509+
// and that requests which happen while a large result is being sent don't fail.
1510+
func TestBigResult(t *testing.T) {
1511+
if os.Getenv("I_HAVE_A_LOT_OF_MEMORY_AND_TIME") != "1" {
1512+
// needs ~40GB of memory and ~4 minutes to run
1513+
t.Skip("skipping test due to requiced resources, set I_HAVE_A_LOT_OF_MEMORY_AND_TIME=1 to run")
1514+
}
1515+
1516+
// setup server
1517+
1518+
serverHandler := &BigCallTestServerHandler{}
1519+
1520+
rpcServer := NewServer()
1521+
rpcServer.Register("SimpleServerHandler", serverHandler)
1522+
1523+
// httptest stuff
1524+
testServ := httptest.NewServer(rpcServer)
1525+
defer testServ.Close()
1526+
// setup client
1527+
1528+
var client struct {
1529+
Do func() (RecRes, error)
1530+
Ch func(ctx context.Context) (<-chan int, error)
1531+
}
1532+
closer, err := NewClient(context.Background(), "ws://"+testServ.Listener.Addr().String(), "SimpleServerHandler", &client, nil)
1533+
require.NoError(t, err)
1534+
defer closer()
1535+
1536+
chctx, cancel := context.WithCancel(context.Background())
1537+
defer cancel()
1538+
1539+
// client.Ch will generate some requests, which will require websocket locks,
1540+
// and before fixes in #97 would cause deadlocks / timeouts when combined with
1541+
// the large result processing from client.Do
1542+
ch, err := client.Ch(chctx)
1543+
require.NoError(t, err)
1544+
1545+
prevN := <-ch
1546+
1547+
go func() {
1548+
for n := range ch {
1549+
if n != prevN+1 {
1550+
panic("bad order")
1551+
}
1552+
prevN = n
1553+
}
1554+
}()
1555+
1556+
_, err = client.Do()
1557+
require.NoError(t, err)
1558+
1559+
fmt.Println("done")
1560+
}

server.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ import (
55
"encoding/json"
66
"io"
77
"net/http"
8+
"runtime/pprof"
89
"strings"
910
"time"
1011

12+
"github.com/google/uuid"
1113
"github.com/gorilla/websocket"
1214
)
1315

@@ -77,7 +79,10 @@ func (s *RPCServer) handleWS(ctx context.Context, w http.ResponseWriter, r *http
7779
}
7880
}
7981

80-
wc.handleWsConn(ctx)
82+
lbl := pprof.Labels("jrpc-mode", "wsserver", "jrpc-remote", r.RemoteAddr, "jrpc-uuid", uuid.New().String())
83+
pprof.Do(ctx, lbl, func(ctx context.Context) {
84+
wc.handleWsConn(ctx)
85+
})
8186

8287
if err := c.Close(); err != nil {
8388
log.Errorw("closing websocket connection", "error", err)

0 commit comments

Comments
 (0)