File tree Expand file tree Collapse file tree 3 files changed +5
-2
lines changed
server/text_generation_server Expand file tree Collapse file tree 3 files changed +5
-2
lines changed Original file line number Diff line number Diff line change @@ -149,7 +149,7 @@ def __init__(
149149 ):
150150 super ().__init__ ()
151151 self .max_past = (
152- config .sliding_window if config .sliding_window is not None else 0
152+ config .sliding_window if config .sliding_window is not None else - 1
153153 )
154154 self .num_heads = config .num_attention_heads
155155 self .hidden_size = config .hidden_size
Original file line number Diff line number Diff line change @@ -204,7 +204,7 @@ def __init__(
204204 ):
205205 super ().__init__ ()
206206 self .max_past = (
207- config .sliding_window if config .sliding_window is not None else 0
207+ config .sliding_window if config .sliding_window is not None else - 1
208208 )
209209 self .num_heads = config .num_attention_heads
210210 self .hidden_size = config .hidden_size
Original file line number Diff line number Diff line change @@ -72,6 +72,9 @@ def attention(
7272 softmax_scale ,
7373 window_size_left = - 1 ,
7474):
75+ if window_size_left <= 0 and window_size_left != - 1 :
76+ raise ValueError ("`window_size_left` must be > 0 or -1" )
77+
7578 if HAS_FLASH_ATTN_V2_CUDA :
7679 return flash_attn_2_cuda .varlen_fwd (
7780 q ,
You can’t perform that action at this time.
0 commit comments