|
85 | 85 | "merged_model.save_pretrained(\"./models/llama_32_8b_merged\")\n",
|
86 | 86 | "tokenizer.save_pretrained(\"./models/llama_32_8b_merged\")"
|
87 | 87 | ]
|
| 88 | + }, |
| 89 | + { |
| 90 | + "cell_type": "code", |
| 91 | + "execution_count": 3, |
| 92 | + "metadata": {}, |
| 93 | + "outputs": [ |
| 94 | + { |
| 95 | + "data": { |
| 96 | + "application/vnd.jupyter.widget-view+json": { |
| 97 | + "model_id": "444d71f492da4615b0d665a2af28e469", |
| 98 | + "version_major": 2, |
| 99 | + "version_minor": 0 |
| 100 | + }, |
| 101 | + "text/plain": [ |
| 102 | + "Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]" |
| 103 | + ] |
| 104 | + }, |
| 105 | + "metadata": {}, |
| 106 | + "output_type": "display_data" |
| 107 | + }, |
| 108 | + { |
| 109 | + "name": "stderr", |
| 110 | + "output_type": "stream", |
| 111 | + "text": [ |
| 112 | + "Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at unsloth/Meta-Llama-3.1-8B and are newly initialized: ['score.weight']\n", |
| 113 | + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" |
| 114 | + ] |
| 115 | + }, |
| 116 | + { |
| 117 | + "ename": "RuntimeError", |
| 118 | + "evalue": "Error(s) in loading state_dict for LlamaForSequenceClassification:\n\tsize mismatch for score.modules_to_save.default.weight: copying a param with shape torch.Size([1, 4096]) from checkpoint, the shape in current model is torch.Size([2, 4096]).", |
| 119 | + "output_type": "error", |
| 120 | + "traceback": [ |
| 121 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
| 122 | + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", |
| 123 | + "Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModelForSequenceClassification, AutoTokenizer\n\u001b[0;32m----> 3\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModelForSequenceClassification\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m./models/stories_train_model_v3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mauto\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./models/stories_train_model_v3\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 8\u001b[0m model \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mmerge_and_unload()\n", |
| 124 | + "File \u001b[0;32m/workspace/best-hn/.venv/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py:564\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 563\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 564\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 566\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 570\u001b[0m )\n", |
| 125 | + "File \u001b[0;32m/workspace/best-hn/.venv/lib/python3.12/site-packages/transformers/modeling_utils.py:4276\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 4273\u001b[0m model\u001b[38;5;241m.\u001b[39mhf_quantizer \u001b[38;5;241m=\u001b[39m hf_quantizer\n\u001b[1;32m 4275\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _adapter_model_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 4276\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_adapter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4277\u001b[0m \u001b[43m \u001b[49m\u001b[43m_adapter_model_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4278\u001b[0m \u001b[43m \u001b[49m\u001b[43madapter_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madapter_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4279\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4280\u001b[0m \u001b[43m \u001b[49m\u001b[43madapter_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madapter_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4281\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4283\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m output_loading_info:\n\u001b[1;32m 4284\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m loading_info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", |
| 126 | + "File \u001b[0;32m/workspace/best-hn/.venv/lib/python3.12/site-packages/transformers/integrations/peft.py:233\u001b[0m, in \u001b[0;36mPeftAdapterMixin.load_adapter\u001b[0;34m(self, peft_model_id, adapter_name, revision, token, device_map, max_memory, offload_folder, offload_index, peft_config, adapter_state_dict, low_cpu_mem_usage, adapter_kwargs)\u001b[0m\n\u001b[1;32m 230\u001b[0m processed_adapter_state_dict[new_key] \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 232\u001b[0m \u001b[38;5;66;03m# Load state dict\u001b[39;00m\n\u001b[0;32m--> 233\u001b[0m incompatible_keys \u001b[38;5;241m=\u001b[39m \u001b[43mset_peft_model_state_dict\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprocessed_adapter_state_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madapter_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpeft_load_kwargs\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m incompatible_keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 238\u001b[0m \u001b[38;5;66;03m# check only for unexpected keys\u001b[39;00m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(incompatible_keys, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munexpected_keys\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(incompatible_keys\u001b[38;5;241m.\u001b[39munexpected_keys) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", |
| 127 | + "File \u001b[0;32m/workspace/best-hn/.venv/lib/python3.12/site-packages/peft/utils/save_and_load.py:464\u001b[0m, in \u001b[0;36mset_peft_model_state_dict\u001b[0;34m(model, peft_model_state_dict, adapter_name, ignore_mismatched_sizes, low_cpu_mem_usage)\u001b[0m\n\u001b[1;32m 462\u001b[0m module\u001b[38;5;241m.\u001b[39m_move_adapter_to_device_of_base_layer(adapter_name)\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 464\u001b[0m load_result \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_state_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpeft_model_state_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstrict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 466\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m config\u001b[38;5;241m.\u001b[39mis_prompt_learning:\n\u001b[1;32m 467\u001b[0m model\u001b[38;5;241m.\u001b[39mprompt_encoder[adapter_name]\u001b[38;5;241m.\u001b[39membedding\u001b[38;5;241m.\u001b[39mload_state_dict(\n\u001b[1;32m 468\u001b[0m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mweight\u001b[39m\u001b[38;5;124m\"\u001b[39m: peft_model_state_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompt_embeddings\u001b[39m\u001b[38;5;124m\"\u001b[39m]}, strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 469\u001b[0m )\n", |
| 128 | + "File \u001b[0;32m/workspace/best-hn/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:2215\u001b[0m, in \u001b[0;36mModule.load_state_dict\u001b[0;34m(self, state_dict, strict, assign)\u001b[0m\n\u001b[1;32m 2210\u001b[0m error_msgs\u001b[38;5;241m.\u001b[39minsert(\n\u001b[1;32m 2211\u001b[0m \u001b[38;5;241m0\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMissing key(s) in state_dict: \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m 2212\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mk\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m missing_keys)))\n\u001b[1;32m 2214\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(error_msgs) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 2215\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mError(s) in loading state_dict for \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m 2216\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(error_msgs)))\n\u001b[1;32m 2217\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _IncompatibleKeys(missing_keys, unexpected_keys)\n", |
| 129 | + "\u001b[0;31mRuntimeError\u001b[0m: Error(s) in loading state_dict for LlamaForSequenceClassification:\n\tsize mismatch for score.modules_to_save.default.weight: copying a param with shape torch.Size([1, 4096]) from checkpoint, the shape in current model is torch.Size([2, 4096])." |
| 130 | + ] |
| 131 | + } |
| 132 | + ], |
| 133 | + "source": [ |
| 134 | + "from dotenv import load_dotenv\n", |
| 135 | + "\n", |
| 136 | + "load_dotenv()\n", |
| 137 | + "\n", |
| 138 | + "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n", |
| 139 | + "\n", |
| 140 | + "model = AutoModelForSequenceClassification.from_pretrained(\n", |
| 141 | + " \"unsloth/Meta-Llama-3.1-8B\", device_map=\"auto\", num_labels=1\n", |
| 142 | + ")\n", |
| 143 | + "\n", |
| 144 | + "model = AutoModelForSequenceClassification.from_pretrained(\n", |
| 145 | + " \"./models/stories_train_model_v3\", device_map=\"auto\", num_labels=1\n", |
| 146 | + ")\n", |
| 147 | + "tokenizer = AutoTokenizer.from_pretrained(\"./models/stories_train_model_v3\")\n", |
| 148 | + "\n", |
| 149 | + "model = model.merge_and_unload()\n", |
| 150 | + "\n", |
| 151 | + "from huggingface_hub import HfApi\n", |
| 152 | + "\n", |
| 153 | + "# Login to Hugging Face (make sure you have your token set in the environment)\n", |
| 154 | + "api = HfApi()\n", |
| 155 | + "\n", |
| 156 | + "# Create private repository and push model\n", |
| 157 | + "model.push_to_hub(\"OpenPipe/lat-pretrained-rm-v1\", private=True, use_auth_token=True)\n", |
| 158 | + "\n", |
| 159 | + "# Push tokenizer\n", |
| 160 | + "tokenizer.push_to_hub(\n", |
| 161 | + " \"OpenPipe/lat-pretrained-rm-v1\", private=True, use_auth_token=True\n", |
| 162 | + ")\n" |
| 163 | + ] |
88 | 164 | }
|
89 | 165 | ],
|
90 | 166 | "metadata": {
|
|
0 commit comments