Skip to content

Commit cef1873

Browse files
committed
more work
1 parent 39d434d commit cef1873

9 files changed

+687
-129
lines changed

inference.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,16 @@ class MandT:
1313
tokenizer: AutoTokenizer
1414

1515

16+
ModelOrPath = Union[MandT, str]
17+
18+
19+
def load_model(model_or_path: ModelOrPath) -> MandT:
20+
if isinstance(model_or_path, str):
21+
return load_peft_model(model_or_path, merge=True)
22+
else:
23+
return model_or_path
24+
25+
1626
def load_peft_model(model_path: str, merge: bool = False) -> MandT:
1727
model = AutoModelForSequenceClassification.from_pretrained(
1828
model_path, num_labels=1, device_map="auto", torch_dtype=torch.bfloat16
@@ -35,10 +45,7 @@ def run_inference_transformers(
3545
model_or_path: Union[MandT, str],
3646
batch_size: int = 4,
3747
) -> list[float]:
38-
if isinstance(model_or_path, str):
39-
mandt = load_peft_model(model_or_path, merge=True)
40-
else:
41-
mandt = model_or_path
48+
mandt = load_model(model_or_path)
4249

4350
model = mandt.model
4451
tokenizer = mandt.tokenizer

merge-model.ipynb

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,82 @@
8585
"merged_model.save_pretrained(\"./models/llama_32_8b_merged\")\n",
8686
"tokenizer.save_pretrained(\"./models/llama_32_8b_merged\")"
8787
]
88+
},
89+
{
90+
"cell_type": "code",
91+
"execution_count": 3,
92+
"metadata": {},
93+
"outputs": [
94+
{
95+
"data": {
96+
"application/vnd.jupyter.widget-view+json": {
97+
"model_id": "444d71f492da4615b0d665a2af28e469",
98+
"version_major": 2,
99+
"version_minor": 0
100+
},
101+
"text/plain": [
102+
"Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]"
103+
]
104+
},
105+
"metadata": {},
106+
"output_type": "display_data"
107+
},
108+
{
109+
"name": "stderr",
110+
"output_type": "stream",
111+
"text": [
112+
"Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at unsloth/Meta-Llama-3.1-8B and are newly initialized: ['score.weight']\n",
113+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
114+
]
115+
},
116+
{
117+
"ename": "RuntimeError",
118+
"evalue": "Error(s) in loading state_dict for LlamaForSequenceClassification:\n\tsize mismatch for score.modules_to_save.default.weight: copying a param with shape torch.Size([1, 4096]) from checkpoint, the shape in current model is torch.Size([2, 4096]).",
119+
"output_type": "error",
120+
"traceback": [
121+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
122+
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
123+
"Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModelForSequenceClassification, AutoTokenizer\n\u001b[0;32m----> 3\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModelForSequenceClassification\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m./models/stories_train_model_v3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mauto\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./models/stories_train_model_v3\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 8\u001b[0m model \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mmerge_and_unload()\n",
124+
"File \u001b[0;32m/workspace/best-hn/.venv/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py:564\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 563\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 564\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 566\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 570\u001b[0m )\n",
125+
"File \u001b[0;32m/workspace/best-hn/.venv/lib/python3.12/site-packages/transformers/modeling_utils.py:4276\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 4273\u001b[0m model\u001b[38;5;241m.\u001b[39mhf_quantizer \u001b[38;5;241m=\u001b[39m hf_quantizer\n\u001b[1;32m 4275\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _adapter_model_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 4276\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_adapter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4277\u001b[0m \u001b[43m \u001b[49m\u001b[43m_adapter_model_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4278\u001b[0m \u001b[43m \u001b[49m\u001b[43madapter_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madapter_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4279\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4280\u001b[0m \u001b[43m \u001b[49m\u001b[43madapter_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madapter_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4281\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4283\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m output_loading_info:\n\u001b[1;32m 4284\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m loading_info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
126+
"File \u001b[0;32m/workspace/best-hn/.venv/lib/python3.12/site-packages/transformers/integrations/peft.py:233\u001b[0m, in \u001b[0;36mPeftAdapterMixin.load_adapter\u001b[0;34m(self, peft_model_id, adapter_name, revision, token, device_map, max_memory, offload_folder, offload_index, peft_config, adapter_state_dict, low_cpu_mem_usage, adapter_kwargs)\u001b[0m\n\u001b[1;32m 230\u001b[0m processed_adapter_state_dict[new_key] \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 232\u001b[0m \u001b[38;5;66;03m# Load state dict\u001b[39;00m\n\u001b[0;32m--> 233\u001b[0m incompatible_keys \u001b[38;5;241m=\u001b[39m \u001b[43mset_peft_model_state_dict\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprocessed_adapter_state_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madapter_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpeft_load_kwargs\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m incompatible_keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 238\u001b[0m \u001b[38;5;66;03m# check only for unexpected keys\u001b[39;00m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(incompatible_keys, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munexpected_keys\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(incompatible_keys\u001b[38;5;241m.\u001b[39munexpected_keys) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
127+
"File \u001b[0;32m/workspace/best-hn/.venv/lib/python3.12/site-packages/peft/utils/save_and_load.py:464\u001b[0m, in \u001b[0;36mset_peft_model_state_dict\u001b[0;34m(model, peft_model_state_dict, adapter_name, ignore_mismatched_sizes, low_cpu_mem_usage)\u001b[0m\n\u001b[1;32m 462\u001b[0m module\u001b[38;5;241m.\u001b[39m_move_adapter_to_device_of_base_layer(adapter_name)\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 464\u001b[0m load_result \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_state_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpeft_model_state_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstrict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 466\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m config\u001b[38;5;241m.\u001b[39mis_prompt_learning:\n\u001b[1;32m 467\u001b[0m model\u001b[38;5;241m.\u001b[39mprompt_encoder[adapter_name]\u001b[38;5;241m.\u001b[39membedding\u001b[38;5;241m.\u001b[39mload_state_dict(\n\u001b[1;32m 468\u001b[0m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mweight\u001b[39m\u001b[38;5;124m\"\u001b[39m: peft_model_state_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompt_embeddings\u001b[39m\u001b[38;5;124m\"\u001b[39m]}, strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 469\u001b[0m )\n",
128+
"File \u001b[0;32m/workspace/best-hn/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:2215\u001b[0m, in \u001b[0;36mModule.load_state_dict\u001b[0;34m(self, state_dict, strict, assign)\u001b[0m\n\u001b[1;32m 2210\u001b[0m error_msgs\u001b[38;5;241m.\u001b[39minsert(\n\u001b[1;32m 2211\u001b[0m \u001b[38;5;241m0\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMissing key(s) in state_dict: \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m 2212\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mk\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m missing_keys)))\n\u001b[1;32m 2214\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(error_msgs) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 2215\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mError(s) in loading state_dict for \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m 2216\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(error_msgs)))\n\u001b[1;32m 2217\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _IncompatibleKeys(missing_keys, unexpected_keys)\n",
129+
"\u001b[0;31mRuntimeError\u001b[0m: Error(s) in loading state_dict for LlamaForSequenceClassification:\n\tsize mismatch for score.modules_to_save.default.weight: copying a param with shape torch.Size([1, 4096]) from checkpoint, the shape in current model is torch.Size([2, 4096])."
130+
]
131+
}
132+
],
133+
"source": [
134+
"from dotenv import load_dotenv\n",
135+
"\n",
136+
"load_dotenv()\n",
137+
"\n",
138+
"from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
139+
"\n",
140+
"model = AutoModelForSequenceClassification.from_pretrained(\n",
141+
" \"unsloth/Meta-Llama-3.1-8B\", device_map=\"auto\", num_labels=1\n",
142+
")\n",
143+
"\n",
144+
"model = AutoModelForSequenceClassification.from_pretrained(\n",
145+
" \"./models/stories_train_model_v3\", device_map=\"auto\", num_labels=1\n",
146+
")\n",
147+
"tokenizer = AutoTokenizer.from_pretrained(\"./models/stories_train_model_v3\")\n",
148+
"\n",
149+
"model = model.merge_and_unload()\n",
150+
"\n",
151+
"from huggingface_hub import HfApi\n",
152+
"\n",
153+
"# Login to Hugging Face (make sure you have your token set in the environment)\n",
154+
"api = HfApi()\n",
155+
"\n",
156+
"# Create private repository and push model\n",
157+
"model.push_to_hub(\"OpenPipe/lat-pretrained-rm-v1\", private=True, use_auth_token=True)\n",
158+
"\n",
159+
"# Push tokenizer\n",
160+
"tokenizer.push_to_hub(\n",
161+
" \"OpenPipe/lat-pretrained-rm-v1\", private=True, use_auth_token=True\n",
162+
")\n"
163+
]
88164
}
89165
],
90166
"metadata": {

0 commit comments

Comments
 (0)