Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
aef9ac7
Merge pull request #17 from hugolatendresse/fixed_aggregate
mhk197 Dec 12, 2024
ea439b8
try one hidden layer and see if outputs
Dec 12, 2024
391ccda
made slightly more sense than prev but changing back
Dec 12, 2024
4449e0b
num experts 1
Dec 12, 2024
28f719d
num experts 1
Dec 12, 2024
84df2b1
Merge remote-tracking branch 'origin/dev_mixtral' into matt-groupby-1210
hugolatendresse Dec 12, 2024
1315dd7
try fix
Dec 12, 2024
60d5237
Merge branch 'matt-groupby-1210' of https://github.com/hugolatendress…
Dec 12, 2024
0a09ff6
try without num_local_experts = 1
Dec 12, 2024
3dde8ed
try without num_local_experts = 1
Dec 12, 2024
e9acdf4
fixed
Dec 12, 2024
2bc3777
fixed
Dec 12, 2024
3c3c1db
try new experts
Dec 12, 2024
84a450b
debug groupby
Dec 12, 2024
31ad4e9
debug groupby
Dec 12, 2024
a4eb58b
trying this
Dec 12, 2024
cb6d1e9
trying this
Dec 12, 2024
77e1e5a
trying this
Dec 12, 2024
24ef878
pushed
Dec 12, 2024
e0a14f5
fixed arg counts magic number in aggregate
Dec 12, 2024
59e9d84
try 2 experts now
Dec 12, 2024
28a2f9b
failed with 2 experts, 2 tokens per expert. trying 2 experts, 1 token…
Dec 12, 2024
a693ae0
worked with n = 2, tok = 1. trying n = 3, tok = 2.
Dec 12, 2024
8788a02
worked with n = 2, tok = 1. trying n = 3, tok = 2.
Dec 12, 2024
7d5dc9e
maybe fixed cuda kernel?
Dec 12, 2024
c810f20
changed incr_decodig
Dec 13, 2024
b678b26
num experts = 1, tok = 1
Dec 13, 2024
38aa0c6
k =1, n =2
Dec 13, 2024
4a3000d
k =1, n =1
Dec 13, 2024
6e585a1
k =1, n =2
Dec 13, 2024
4eca725
n = 3, k = 1
Dec 13, 2024
2e8e24a
n = 4, k = 1
Dec 13, 2024
2513484
n = 5, k = 1
Dec 13, 2024
98a7f90
full model again
Dec 13, 2024
03c67c9
try 1 again
Dec 13, 2024
7b48a58
try 1 again
Dec 13, 2024
97a73b7
try 1 again
Dec 13, 2024
50e2883
try 1 again
Dec 13, 2024
26e874f
try 1 again
Dec 13, 2024
e4f87f4
conflicts
hugolatendresse Dec 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion inference/incr_decoding/incr_decoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ void FlexFlow::top_level_task(Task const *task,
bool do_sample = false;
float temperature = 0.0f;
float topp = 0.0f;
int max_requests_per_batch = 8;
int max_requests_per_batch = 1; // 8
int max_tokens_per_batch = 128;
int max_sequence_length = 256;

Expand Down
3 changes: 2 additions & 1 deletion inference/models/mixtral.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ void MIXTRAL::create_mixtral_model(FFModel &ff,
bool use_full_precision) {

MixtralConfig mixtral_config(model_config_file_path);
mixtral_config.print();

if (ff.config.tensor_parallelism_degree > mixtral_config.num_attention_heads ||
mixtral_config.num_attention_heads % ff.config.tensor_parallelism_degree !=
Expand All @@ -45,6 +44,8 @@ void MIXTRAL::create_mixtral_model(FFModel &ff,
}

std::unordered_map<std::string, Layer *> weights_layers;
mixtral_config.num_local_experts = 5;
mixtral_config.num_experts_per_tok = 1;

Tensor input;
{
Expand Down
Binary file added prof_1.gz
Binary file not shown.
Binary file added prof_2.gz
Binary file not shown.
Binary file added prof_3.gz
Binary file not shown.
Binary file added prof_4.gz
Binary file not shown.
Binary file added prof_5.gz
Binary file not shown.
Binary file added prof_6.gz
Binary file not shown.
4 changes: 2 additions & 2 deletions src/ops/aggregate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -353,13 +353,13 @@ OpMeta *Aggregate::init_task(Task const *task,

// Only needed to allocate memroy in the kernel
AggregateMeta *m = new AggregateMeta(handle, agg, gpu_mem_allocator);
for (int i = 0; i < regions.size() - 1; i++) {
int num_inputs = agg->n + FIXED_ARG_CNT;
for (int i = 0; i < num_inputs; i++) {
m->input_type[i] = agg->inputs[i]->data_type;
}
m->output_type[0] = agg->outputs[0]->data_type;
std::strcpy(m->op_name, agg->name);

// TODO three instructions below are not in SigmoidSiluMulti::init_task
m->profiling = agg->profiling;
m->inference_debugging = agg->inference_debugging;
std::strcpy(m->op_name, agg->name);
Expand Down
2 changes: 1 addition & 1 deletion src/ops/group_by.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ __global__ void

// Get pred pointers, single thread per block
if (threadIdx.x == 0) {
int exp_tensor_rows = ceil(alpha * k / n * batch_size);
int exp_tensor_rows = alpha == 0.0f ? ceil(alpha * k / n * batch_size) : 128;
int expert_idx[MAX_N] = {0};
for (int i = 0; i < k * batch_size; i++) {
// Get pointer to chosen expert predictions
Expand Down