|
377 | 377 | " <tr>\n",
|
378 | 378 | " <th>0</th>\n",
|
379 | 379 | " <td>L-BFGS-B</td>\n",
|
380 |
| - " <td>0.028414</td>\n", |
381 |
| - " <td>0.767214</td>\n", |
382 |
| - " <td>0.630689</td>\n", |
| 380 | + " <td>0.017857</td>\n", |
| 381 | + " <td>0.720272</td>\n", |
| 382 | + " <td>0.572021</td>\n", |
383 | 383 | " </tr>\n",
|
384 | 384 | " <tr>\n",
|
385 | 385 | " <th>1</th>\n",
|
386 | 386 | " <td>TNC</td>\n",
|
387 |
| - " <td>0.023116</td>\n", |
388 |
| - " <td>0.808149</td>\n", |
389 |
| - " <td>0.664340</td>\n", |
| 387 | + " <td>0.022109</td>\n", |
| 388 | + " <td>0.755240</td>\n", |
| 389 | + " <td>0.599792</td>\n", |
390 | 390 | " </tr>\n",
|
391 | 391 | " <tr>\n",
|
392 | 392 | " <th>2</th>\n",
|
393 | 393 | " <td>SLSQP</td>\n",
|
394 |
| - " <td>0.003840</td>\n", |
395 |
| - " <td>0.767214</td>\n", |
396 |
| - " <td>0.630689</td>\n", |
| 394 | + " <td>0.002948</td>\n", |
| 395 | + " <td>0.720272</td>\n", |
| 396 | + " <td>0.572021</td>\n", |
397 | 397 | " </tr>\n",
|
398 | 398 | " <tr>\n",
|
399 | 399 | " <th>3</th>\n",
|
400 | 400 | " <td>trf</td>\n",
|
401 |
| - " <td>0.232000</td>\n", |
402 |
| - " <td>0.771106</td>\n", |
403 |
| - " <td>0.633888</td>\n", |
| 401 | + " <td>0.235711</td>\n", |
| 402 | + " <td>0.721336</td>\n", |
| 403 | + " <td>0.572866</td>\n", |
404 | 404 | " </tr>\n",
|
405 | 405 | " </tbody>\n",
|
406 | 406 | "</table>\n",
|
407 | 407 | "</div>"
|
408 | 408 | ],
|
409 | 409 | "text/plain": [
|
410 | 410 | " Method Time Elapsed (min) Error / Baseline Error Error / Ground Truth\n",
|
411 |
| - "0 L-BFGS-B 0.028414 0.767214 0.630689\n", |
412 |
| - "1 TNC 0.023116 0.808149 0.664340\n", |
413 |
| - "2 SLSQP 0.003840 0.767214 0.630689\n", |
414 |
| - "3 trf 0.232000 0.771106 0.633888" |
| 411 | + "0 L-BFGS-B 0.017857 0.720272 0.572021\n", |
| 412 | + "1 TNC 0.022109 0.755240 0.599792\n", |
| 413 | + "2 SLSQP 0.002948 0.720272 0.572021\n", |
| 414 | + "3 trf 0.235711 0.721336 0.572866" |
415 | 415 | ]
|
416 | 416 | },
|
417 | 417 | "execution_count": 13,
|
|
739 | 739 | " <th>fraction</th>\n",
|
740 | 740 | " <th>filename</th>\n",
|
741 | 741 | " <th>ms1_int_sum</th>\n",
|
742 |
| - " <th>int_sum_dn</th>\n", |
| 742 | + " <th>ms1_int_sum_dn</th>\n", |
743 | 743 | " </tr>\n",
|
744 | 744 | " </thead>\n",
|
745 | 745 | " <tbody>\n",
|
|
796 | 796 | "</div>"
|
797 | 797 | ],
|
798 | 798 | "text/plain": [
|
799 |
| - " precursor fraction filename ms1_int_sum int_sum_dn\n", |
800 |
| - "0 Prec_1 1 A 0.6 0.887676\n", |
801 |
| - "1 Prec_1 1 A 0.8 1.183568\n", |
802 |
| - "2 Prec_1 2 A 0.6 0.926809\n", |
803 |
| - "3 Prec_1 1 B 1.2 0.891552\n", |
804 |
| - "4 Prec_1 1 B 1.6 1.188736\n", |
805 |
| - "5 Prec_1 2 B 1.2 0.921659" |
| 799 | + " precursor fraction filename ms1_int_sum ms1_int_sum_dn\n", |
| 800 | + "0 Prec_1 1 A 0.6 0.887676\n", |
| 801 | + "1 Prec_1 1 A 0.8 1.183568\n", |
| 802 | + "2 Prec_1 2 A 0.6 0.926809\n", |
| 803 | + "3 Prec_1 1 B 1.2 0.891552\n", |
| 804 | + "4 Prec_1 1 B 1.6 1.188736\n", |
| 805 | + "5 Prec_1 2 B 1.2 0.921659" |
806 | 806 | ]
|
807 | 807 | },
|
808 | 808 | "metadata": {},
|
|
1217 | 1217 | " \"\"\"\n",
|
1218 | 1218 | " grouped, protein = chunk\n",
|
1219 | 1219 | "\n",
|
1220 |
| - " column_combinations = List()\n", |
1221 |
| - " [column_combinations.append(_) for _ in combinations(range(len(files)), 2)]\n", |
1222 |
| - " \n", |
| 1220 | + " files_ = grouped.index.get_level_values('filename').unique().tolist()\n", |
| 1221 | + "\n", |
1223 | 1222 | " selection = grouped.unstack().T.copy()\n",
|
1224 | 1223 | " selection = selection.replace(0, np.nan)\n",
|
1225 | 1224 | "\n",
|
1226 |
| - " if not selection.shape[1] == len(files):\n", |
1227 |
| - " selection[[_ for _ in files if _ not in selection.columns]] = np.nan\n", |
| 1225 | + " if len(files_) > 1: \n", |
| 1226 | + " column_combinations = List()\n", |
| 1227 | + " [column_combinations.append(_) for _ in combinations(range(len(files_)), 2)]\n", |
1228 | 1228 | "\n",
|
1229 |
| - " selection = selection[files]\n", |
1230 |
| - "\n", |
1231 |
| - " ratios = get_protein_ratios(selection.values, column_combinations, minimum_ratios)\n", |
1232 |
| - " \n", |
1233 |
| - " retry = False\n", |
1234 |
| - " try:\n", |
1235 |
| - " solution, success = solve_profile(ratios, 'L-BFGS-B')\n", |
1236 |
| - " except ValueError:\n", |
1237 |
| - " retry = True\n", |
1238 |
| - "\n", |
1239 |
| - " if retry or not success:\n", |
1240 |
| - " logging.info('Normalization with L-BFGS-B failed. Trying Powell')\n", |
1241 |
| - " solution, success = solve_profile(ratios, 'Powell')\n", |
| 1229 | + " ratios = get_protein_ratios(selection.values, column_combinations, minimum_ratios)\n", |
1242 | 1230 | " \n",
|
1243 |
| - " pre_lfq = selection.sum().values\n", |
| 1231 | + " retry = False\n", |
| 1232 | + " try:\n", |
| 1233 | + " solution, success = solve_profile(ratios, 'L-BFGS-B')\n", |
| 1234 | + " except ValueError:\n", |
| 1235 | + " retry = True\n", |
| 1236 | + "\n", |
| 1237 | + " if retry or not success:\n", |
| 1238 | + " logging.info('Normalization with L-BFGS-B failed. Trying Powell')\n", |
| 1239 | + " solution, success = solve_profile(ratios, 'Powell')\n", |
| 1240 | + "\n", |
| 1241 | + " pre_lfq = selection.sum().values\n", |
1244 | 1242 | "\n",
|
1245 |
| - " if not success or np.sum(~np.isnan(ratios)) == 0: # or np.sum(solution) == len(pre_lfq):\n", |
1246 |
| - " profile = np.zeros_like(pre_lfq)\n", |
1247 |
| - " if np.sum(np.isnan(ratios)) != ratios.size:\n", |
1248 |
| - " logging.info(f'Solver failed for protein {protein} despite available ratios:\\n {ratios}')\n", |
| 1243 | + " if not success or np.sum(~np.isnan(ratios)) == 0: # or np.sum(solution) == len(pre_lfq):\n", |
| 1244 | + " profile = np.zeros(len(files_))\n", |
| 1245 | + " if np.sum(np.isnan(ratios)) != ratios.size:\n", |
| 1246 | + " logging.info(f'Solver failed for protein {protein} despite available ratios:\\n {ratios}')\n", |
1249 | 1247 | "\n",
|
| 1248 | + " else:\n", |
| 1249 | + " invalid = ((np.nansum(ratios, axis=1) == 0) & (np.nansum(ratios, axis=0) == 0))\n", |
| 1250 | + " peptide_int_sum = pre_lfq.sum() * solution \n", |
| 1251 | + " peptide_int_sum[invalid] = 0\n", |
| 1252 | + " profile = peptide_int_sum * pre_lfq.sum() / np.sum(peptide_int_sum) #Normalize inensity again\n", |
1250 | 1253 | " else:\n",
|
1251 |
| - " invalid = ((np.nansum(ratios, axis=1) == 0) & (np.nansum(ratios, axis=0) == 0))\n", |
1252 |
| - " peptide_int_sum = pre_lfq.sum() * solution \n", |
1253 |
| - " peptide_int_sum[invalid] = 0\n", |
1254 |
| - " profile = peptide_int_sum * pre_lfq.sum() / np.sum(peptide_int_sum) #Normalize inensity again\n", |
1255 |
| - " \n", |
1256 |
| - " \n", |
| 1254 | + " pre_lfq = profile = selection.values[0]\n", |
| 1255 | + "\n", |
| 1256 | + "\n", |
| 1257 | + " #Rewrite ratios\n", |
| 1258 | + " profile_dict = dict(zip(files_, profile))\n", |
| 1259 | + " pre_dict = dict(zip(files_, pre_lfq))\n", |
| 1260 | + " profile = np.array([0 if file not in profile_dict else profile_dict[file] for file in files])\n", |
| 1261 | + " pre_lfq = np.array([0 if file not in pre_dict else pre_dict[file] for file in files])\n", |
| 1262 | + "\n", |
1257 | 1263 | " return profile, pre_lfq, protein \n"
|
1258 | 1264 | ]
|
1259 | 1265 | },
|
|
1338 | 1344 | ],
|
1339 | 1345 | "text/plain": [
|
1340 | 1346 | " precursor filename protein_group ms1_int_sum\n",
|
1341 |
| - "0 Prec_1 A X 0.6\n", |
1342 |
| - "1 Prec_1 B X 0.8\n", |
1343 |
| - "2 Prec_1 C X 1.0\n", |
1344 |
| - "3 Prec_2 A X 0.6\n", |
1345 |
| - "4 Prec_2 B X 1.2\n", |
1346 |
| - "5 Prec_2 C X 1.4" |
| 1347 | + "0 Prec_1 A X 0.6\n", |
| 1348 | + "1 Prec_1 B X 0.8\n", |
| 1349 | + "2 Prec_1 C X 1.0\n", |
| 1350 | + "3 Prec_2 A X 0.6\n", |
| 1351 | + "4 Prec_2 B X 1.2\n", |
| 1352 | + "5 Prec_2 C X 1.4" |
1347 | 1353 | ]
|
1348 | 1354 | },
|
1349 | 1355 | "metadata": {},
|
|
1473 | 1479 | "\n",
|
1474 | 1480 | " #Take the best precursor for protein quantification. .max()\n",
|
1475 | 1481 | " grouped = df[[field, 'filename','precursor','protein_group']].groupby(['protein_group','filename','precursor']).max()\n",
|
1476 |
| - "\n", |
1477 |
| - " column_combinations = List()\n", |
1478 |
| - " [column_combinations.append(_) for _ in combinations(range(len(files)), 2)]\n", |
1479 | 1482 | " \n",
|
1480 | 1483 | " files = df['filename'].unique().tolist()\n",
|
1481 | 1484 | " files.sort()\n",
|
|
0 commit comments