Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

misc improvements #1

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions Code/42nd-at-threadmill.asd
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
(asdf:defsystem :42nd-at-threadmill
:depends-on (:atomics :bordeaux-threads)
:depends-on (:atomics :bordeaux-threads :sb-simd)
:serial t
:components ((:file "package")
(:module "x86-64"
:components
((:module "VOPs"
:components
((:file "define-boring-vop")
(:file "avx2-broadcastb"
:if-feature (:not :threadmill-avx2))
(:file "sse2-vops"
:if-feature (:not :threadmill-avx2))
(:file "avx2-vops"
:if-feature :threadmill-avx2)
(:file "sse2-vops" :if-feature (:not :threadmill-avx2))
(:file "bsf")
(:file "cas-bytes")))
(:file "sse-metadata"
Expand Down
2 changes: 0 additions & 2 deletions Code/hash-table.lisp
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
(in-package :threadmill)

(defconstant +empty+ '+empty+)
(defconstant +copied+ '+copied+)
;; SBCL seems to have difficulties proving that we won't go out of
;; bounds. Fair enough, we have a funny way of addressing the table,
;; but we don't want to do bounds checks.
Expand Down
4 changes: 3 additions & 1 deletion Code/package.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@
#:gethash #:remhash #:clrhash #:maphash)
(:export #:make-hash-table #:hash-table #:hash-table-p
#:hash-table-test #:hash-table-count #:hash-table-size
#:gethash #:remhash #:clrhash #:maphash #:modhash))
#:gethash #:remhash #:clrhash #:maphash #:modhash)
(:local-nicknames (#:avx2 #:sb-simd-avx2)
(#:sse2 #:sb-simd-sse2)))
3 changes: 3 additions & 0 deletions Code/storage-vector.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
;;; 4. How many elements we are going to copy (used when resizing)
;;; Then the rest of the table just contains a key, then a value, and so on.

(defconstant +empty+ '+empty+)
(defconstant +copied+ '+copied+)

;;; We define macros so that we can SETF and CAS the position, without
;;; having to write setters and (unportable) CAS-ers.
(defconstant +words-before-values+ 8)
Expand Down
15 changes: 0 additions & 15 deletions Code/x86-64/VOPs/avx2-broadcastb.lisp

This file was deleted.

76 changes: 0 additions & 76 deletions Code/x86-64/VOPs/avx2-vops.lisp

This file was deleted.

62 changes: 14 additions & 48 deletions Code/x86-64/VOPs/sse2-vops.lisp
Original file line number Diff line number Diff line change
@@ -1,55 +1,21 @@
(in-package :sb-vm)

(defknown threadmill::%sse2-load
((simple-array (unsigned-byte 8) (*)) (unsigned-byte 64))
(simd-pack integer)
(foldable flushable)
:overwrite-fndb-silently t)

(threadmill::define-boring-vop threadmill::%sse2-load
((vector simple-array-unsigned-byte-8 :scs (descriptor-reg))
(index unsigned-num :scs (unsigned-reg)))
(bytes simd-pack-int :scs (int-sse-reg))
;; SBCL vectors are aligned to 16 bytes.
(inst movdqa bytes
(ea (- (* vector-data-offset n-word-bytes)
other-pointer-lowtag)
vector index 1)))
; require this currently because sb-simd broadcast-byte really sucks
(defknown threadmill::%sse2-broadcast-byte
((unsigned-byte 8))
(simd-pack integer)
(foldable movable flushable)
:overwrite-fndb-silently t)

(threadmill::define-boring-vop threadmill::%sse2-broadcast-byte
((byte unsigned-num :scs (unsigned-reg)))
(bytes simd-pack-int :scs (int-sse-reg))
(inst movd bytes byte) ; xxxxxxxxxxxxxxxB
(inst punpcklbw bytes bytes) ; xxxxxxxxxxxxxxBB
(inst punpcklbw bytes bytes) ; xxxxxxxxxxxxBBBB
(inst pshufd bytes bytes #4r0000)) ; BBBBBBBBBBBBBBBB

(defknown threadmill::%sse2-movemask
((simd-pack integer))
(unsigned-byte 16)
(foldable movable flushable)
:overwrite-fndb-silently t)

(threadmill::define-boring-vop threadmill::%sse2-movemask
((bytes simd-pack-int :scs (int-sse-reg)))
(mask unsigned-num :scs (unsigned-reg))
(inst pmovmskb mask bytes))

(defknown threadmill::%sse2=
((simd-pack integer) (simd-pack integer))
(simd-pack integer)
(simd-pack (unsigned-byte 8))
(foldable movable flushable)
:overwrite-fndb-silently t)

(threadmill::define-boring-vop threadmill::%sse2=
((a simd-pack-int :scs (int-sse-reg) :target result)
(b simd-pack-int :scs (int-sse-reg)))
(result simd-pack-int :scs (int-sse-reg))
(unless (location= a result)
(inst movdqa result a))
(inst pcmpeqb result b))
(sb-c:define-vop (threadmill::%sse2-broadcast-byte)
(:translate threadmill::%sse2-broadcast-byte)
(:policy :fast-safe)
(:args (byte :scs (unsigned-reg)))
(:arg-types unsigned-num)
(:results (bytes :scs (int-sse-reg)))
(:result-types simd-pack-ub8)
(:temporary (:scs (int-sse-reg)) zero)
(:generator 0
(inst movd bytes byte)
(inst pxor zero zero)
(inst pshufb bytes zero)))
8 changes: 4 additions & 4 deletions Code/x86-64/avx2-metadata.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@
(defun bytes (byte group)
"Return matches for a byte in a metadata group."
(declare ((unsigned-byte 8) byte))
(%avx2-movemask
(%avx2= (%avx2-broadcast/256 byte) group)))
(avx2:u8.32-movemask
(avx2:u8.32= group (avx2:u8.32 byte))))

(defun writable (group)
"Return matches for metadata bytes we can put new mappings in."
;; movemask tests the high bit of each byte, and we want to test the
;; high bit, so we have nothing else to do. Magic!
(%avx2-movemask group))
(avx2:u8.32-movemask group))

(defun match-union (m1 m2)
(logior m1 m2))
Expand Down Expand Up @@ -77,7 +77,7 @@ Note that N has a length of an element."
(vector-index position)
(optimize (speed 3) (safety 0)))
;; Why won't SSE:AREF-PI work?
(%avx2-load vector position))
(avx2:u8.32-aref vector position))

(defun metadata-groups (metadata)
(floor (length metadata) +metadata-entries-per-group+))
Expand Down
20 changes: 7 additions & 13 deletions Code/x86-64/sse-metadata.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,17 @@
(declare ((unsigned-byte 8) h2))
(logand #x7f h2))

(if (= 1 avx2-supported)
(defun bytes (byte group)
"Return matches for a byte in a metadata group."
(declare ((unsigned-byte 8) byte))
(%sse2-movemask
(%sse2= (%avx2-broadcast byte) group)))
(defun bytes (byte group)
"Return matches for a byte in a metadata group."
(declare ((unsigned-byte 8) byte))
(%sse2-movemask
(%sse2= (%sse2-broadcast-byte byte) group))))
(defun bytes (byte group)
"Return matches for a byte in a metadata group."
(declare ((unsigned-byte 8) byte))
(sse2:u8.16-movemask
(sse2:u8.16= group (%sse2-broadcast-byte byte))))

(defun writable (group)
"Return matches for metadata bytes we can put new mappings in."
;; movemask tests the high bit of each byte, and we want to test the
;; high bit, so we have nothing else to do. Magic!
(%sse2-movemask group))
(sse2:u8.16-movemask group))

(defun match-union (m1 m2)
(logior m1 m2))
Expand Down Expand Up @@ -82,7 +76,7 @@ Note that N has a length of an element."
(declare (metadata-vector vector)
(vector-index position)
(optimize (speed 3) (safety 0)))
(%sse2-load vector position))
(sse2:u8.16-aref vector position))

(defun metadata-groups (metadata)
(floor (length metadata) +metadata-entries-per-group+))
Expand Down