MannLabs
diff --git a/‎.bumpversion.cfg
+1-1 b/‎.bumpversion.cfg
+1-1
diff --git a/‎alphapept/__init__.py
+1-1 b/‎alphapept/__init__.py
+1-1
diff --git a/‎alphapept/__version__.py
+1-1 b/‎alphapept/__version__.py
+1-1
diff --git a/‎alphapept/interface.py
+5-3 b/‎alphapept/interface.py
+5-3
diff --git a/‎alphapept/quantification.py
+30-27 b/‎alphapept/quantification.py
+30-27
diff --git a/‎alphapept/utils.py
+6-2 b/‎alphapept/utils.py
+6-2
diff --git a/‎installer/one_click_windows/alphapept_innoinstaller.iss
+1-1 b/‎installer/one_click_windows/alphapept_innoinstaller.iss
+1-1
diff --git a/‎installer/one_click_windows/create_installer_windows.bat
+1-1 b/‎installer/one_click_windows/create_installer_windows.bat
+1-1
diff --git a/‎nbs/08_quantification.ipynb
+65-62 b/‎nbs/08_quantification.ipynb
+65-62
diff --git a/‎nbs/11_interface.ipynb
+5-3 b/‎nbs/11_interface.ipynb
+5-3
diff --git a/‎release/one_click_linux_gui/control
+1-1 b/‎release/one_click_linux_gui/control
+1-1
diff --git a/‎release/one_click_linux_gui/create_installer_linux.sh
+1-1 b/‎release/one_click_linux_gui/create_installer_linux.sh
+1-1
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.0
+current_version = 0.4.1
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
 
@@ -1,4 +1,4 @@
-__version__ = "0.4.0"
+__version__ = "0.4.1"
 
 __requirements__ = {
     "": "requirements/requirements.txt",
 
@@ -33,7 +33,7 @@
 AUTHOR_EMAIL = "[email protected]"
 COPYRIGHT = "Mann Labs"
 BRANCH = "master"
-VERSION_NO = "0.4.0"
+VERSION_NO = "0.4.1"
 MIN_PYTHON = "3.6"
 MAX_PYTHON = "4"
 AUDIENCE = "Developers"
 
@@ -1395,12 +1395,14 @@ def cli_database(settings_file):
 
 @click.command(
     "import",
-    help="Import and convert raw data from vendor to `.ms_data.hdf` file.",
-    short_help="Import and convert raw data from vendor to `.ms_data.hdf` file."
+    help="Import and convert raw data from vendor to `.ms_data.hdf` file with default settings.",
+    short_help="Import and convert raw data from vendor to `.ms_data.hdf` file with default settings."
 )
 @CLICK_SETTINGS_OPTION
 def cli_import(settings_file):
-    settings = alphapept.settings.load_settings(settings_file)
+    from .paths import DEFAULT_SETTINGS_PATH
+    settings = alphapept.settings.load_settings_as_template(DEFAULT_SETTINGS_PATH)
+    settings['experiment']['file_paths'] = [settings_file]
     import_raw_data(settings)
 
 
 
@@ -449,43 +449,49 @@ def protein_profile(files: list, minimum_ratios: int, chunk:tuple) -> (np.ndarra
     """
     grouped, protein = chunk
 
-    column_combinations = List()
-    [column_combinations.append(_) for _ in combinations(range(len(files)), 2)]
+    files_ = grouped.index.get_level_values('filename').unique().tolist()
 
     selection = grouped.unstack().T.copy()
     selection = selection.replace(0, np.nan)
 
-    if not selection.shape[1] == len(files):
-        selection[[_ for _ in files if _ not in selection.columns]] = np.nan
+    if len(files_) > 1:
+        column_combinations = List()
+        [column_combinations.append(_) for _ in combinations(range(len(files_)), 2)]
 
-    selection = selection[files]
+        ratios = get_protein_ratios(selection.values, column_combinations, minimum_ratios)
 
-    ratios = get_protein_ratios(selection.values, column_combinations, minimum_ratios)
-
-    retry = False
-    try:
-        solution, success = solve_profile(ratios, 'L-BFGS-B')
-    except ValueError:
-        retry = True
+        retry = False
+        try:
+            solution, success = solve_profile(ratios, 'L-BFGS-B')
+        except ValueError:
+            retry = True
 
-    if retry or not success:
-        logging.info('Normalization with L-BFGS-B failed. Trying Powell')
-        solution, success = solve_profile(ratios, 'Powell')
+        if retry or not success:
+            logging.info('Normalization with L-BFGS-B failed. Trying Powell')
+            solution, success = solve_profile(ratios, 'Powell')
 
-    pre_lfq = selection.sum().values
+        pre_lfq = selection.sum().values
 
-    if not success or np.sum(~np.isnan(ratios)) == 0: # or np.sum(solution) == len(pre_lfq):
-        profile = np.zeros_like(pre_lfq)
-        if np.sum(np.isnan(ratios)) != ratios.size:
-            logging.info(f'Solver failed for protein {protein} despite available ratios:\n {ratios}')
+        if not success or np.sum(~np.isnan(ratios)) == 0: # or np.sum(solution) == len(pre_lfq):
+            profile = np.zeros(len(files_))
+            if np.sum(np.isnan(ratios)) != ratios.size:
+                logging.info(f'Solver failed for protein {protein} despite available ratios:\n {ratios}')
 
+        else:
+            invalid = ((np.nansum(ratios, axis=1) == 0) & (np.nansum(ratios, axis=0) == 0))
+            peptide_int_sum = pre_lfq.sum() * solution
+            peptide_int_sum[invalid] = 0
+            profile = peptide_int_sum * pre_lfq.sum() / np.sum(peptide_int_sum) #Normalize inensity again
     else:
-        invalid = ((np.nansum(ratios, axis=1) == 0) & (np.nansum(ratios, axis=0) == 0))
-        peptide_int_sum = pre_lfq.sum() * solution
-        peptide_int_sum[invalid] = 0
-        profile = peptide_int_sum * pre_lfq.sum() / np.sum(peptide_int_sum) #Normalize inensity again
+        pre_lfq = profile = selection.values[0]
 
 
+    #Rewrite ratios
+    profile_dict = dict(zip(files_, profile))
+    pre_dict = dict(zip(files_, pre_lfq))
+    profile = np.array([0 if file not in profile_dict else profile_dict[file] for file in files])
+    pre_lfq = np.array([0 if file not in pre_dict else pre_dict[file] for file in files])
+
     return profile, pre_lfq, protein
 
 
@@ -519,9 +525,6 @@ def protein_profile_parallel(df: pd.DataFrame, minimum_ratios: int, field: str,
     #Take the best precursor for protein quantification. .max()
     grouped = df[[field, 'filename','precursor','protein_group']].groupby(['protein_group','filename','precursor']).max()
 
-    column_combinations = List()
-    [column_combinations.append(_) for _ in combinations(range(len(files)), 2)]
-
     files = df['filename'].unique().tolist()
     files.sort()
 
 
@@ -296,14 +296,18 @@ def assemble_df(settings, field = 'protein_fdr', callback=None):
 
 def check_file(file):
     if not os.path.isfile(file):
-        raise FileNotFoundError(f"{file}")
+        base, ext = os.path.splitext(file)
+        if not os.path.isfile(base+'.ms_data.hdf'):
+            raise FileNotFoundError(f"{file}")
 
 def get_size_mb(file):
     return os.path.getsize(file)/(1024**2)
 
 def check_dir(dir):
     if not os.path.isdir(dir):
-        raise FileNotFoundError(f"{dir}")
+        base, ext = os.path.splitext(dir)
+        if not os.path.isfile(base+'.ms_data.hdf'):
+            raise FileNotFoundError(f"{dir}")
 
 def delete_file(filename):
     if os.path.isfile(filename):
 
@@ -2,7 +2,7 @@
 ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
 
 #define MyAppName "AlphaPept"
-#define MyAppVersion "0.4.0"
+#define MyAppVersion "0.4.1"
 #define MyAppPublisher "MannLabs"
 #define MyAppURL "https://github.com/MannLabs/alphapept"
 #define MyAppExeName "alphapept.exe"
 
@@ -12,7 +12,7 @@ call DEL /F/Q/S dist > NUL
 call RMDIR /Q/S build
 call RMDIR /Q/S dist
 call python setup.py sdist bdist_wheel
-call pip install dist/alphapept-0.4.0-py3-none-any.whl[stable,gui-stable]
+call pip install dist/alphapept-0.4.1-py3-none-any.whl[stable,gui-stable]
 call pip install pyinstaller==4.7
 call cd installer/one_click_windows
 call pyinstaller ../alphapept.spec -y
 
@@ -377,41 +377,41 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>L-BFGS-B</td>\n",
-       "      <td>0.028414</td>\n",
-       "      <td>0.767214</td>\n",
-       "      <td>0.630689</td>\n",
+       "      <td>0.017857</td>\n",
+       "      <td>0.720272</td>\n",
+       "      <td>0.572021</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>TNC</td>\n",
-       "      <td>0.023116</td>\n",
-       "      <td>0.808149</td>\n",
-       "      <td>0.664340</td>\n",
+       "      <td>0.022109</td>\n",
+       "      <td>0.755240</td>\n",
+       "      <td>0.599792</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>SLSQP</td>\n",
-       "      <td>0.003840</td>\n",
-       "      <td>0.767214</td>\n",
-       "      <td>0.630689</td>\n",
+       "      <td>0.002948</td>\n",
+       "      <td>0.720272</td>\n",
+       "      <td>0.572021</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>trf</td>\n",
-       "      <td>0.232000</td>\n",
-       "      <td>0.771106</td>\n",
-       "      <td>0.633888</td>\n",
+       "      <td>0.235711</td>\n",
+       "      <td>0.721336</td>\n",
+       "      <td>0.572866</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "     Method  Time Elapsed (min)  Error / Baseline Error  Error / Ground Truth\n",
-       "0  L-BFGS-B            0.028414                0.767214              0.630689\n",
-       "1       TNC            0.023116                0.808149              0.664340\n",
-       "2     SLSQP            0.003840                0.767214              0.630689\n",
-       "3       trf            0.232000                0.771106              0.633888"
+       "0  L-BFGS-B            0.017857                0.720272              0.572021\n",
+       "1       TNC            0.022109                0.755240              0.599792\n",
+       "2     SLSQP            0.002948                0.720272              0.572021\n",
+       "3       trf            0.235711                0.721336              0.572866"
       ]
      },
      "execution_count": 13,
@@ -739,7 +739,7 @@
        "      <th>fraction</th>\n",
        "      <th>filename</th>\n",
        "      <th>ms1_int_sum</th>\n",
-       "      <th>int_sum_dn</th>\n",
+       "      <th>ms1_int_sum_dn</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -796,13 +796,13 @@
        "</div>"
       ],
       "text/plain": [
-       "  precursor  fraction filename  ms1_int_sum  int_sum_dn\n",
-       "0    Prec_1         1        A      0.6    0.887676\n",
-       "1    Prec_1         1        A      0.8    1.183568\n",
-       "2    Prec_1         2        A      0.6    0.926809\n",
-       "3    Prec_1         1        B      1.2    0.891552\n",
-       "4    Prec_1         1        B      1.6    1.188736\n",
-       "5    Prec_1         2        B      1.2    0.921659"
+       "  precursor  fraction filename  ms1_int_sum  ms1_int_sum_dn\n",
+       "0    Prec_1         1        A          0.6        0.887676\n",
+       "1    Prec_1         1        A          0.8        1.183568\n",
+       "2    Prec_1         2        A          0.6        0.926809\n",
+       "3    Prec_1         1        B          1.2        0.891552\n",
+       "4    Prec_1         1        B          1.6        1.188736\n",
+       "5    Prec_1         2        B          1.2        0.921659"
       ]
      },
      "metadata": {},
@@ -1217,43 +1217,49 @@
     "    \"\"\"\n",
     "    grouped, protein = chunk\n",
     "\n",
-    "    column_combinations = List()\n",
-    "    [column_combinations.append(_) for _ in combinations(range(len(files)), 2)]\n",
-    "    \n",
+    "    files_ = grouped.index.get_level_values('filename').unique().tolist()\n",
+    "\n",
     "    selection = grouped.unstack().T.copy()\n",
     "    selection = selection.replace(0, np.nan)\n",
     "\n",
-    "    if not selection.shape[1] == len(files):\n",
-    "        selection[[_ for _ in files if _ not in selection.columns]] = np.nan\n",
+    "    if len(files_) > 1:    \n",
+    "        column_combinations = List()\n",
+    "        [column_combinations.append(_) for _ in combinations(range(len(files_)), 2)]\n",
     "\n",
-    "    selection = selection[files]\n",
-    "\n",
-    "    ratios = get_protein_ratios(selection.values, column_combinations, minimum_ratios)\n",
-    "    \n",
-    "    retry = False\n",
-    "    try:\n",
-    "        solution, success = solve_profile(ratios, 'L-BFGS-B')\n",
-    "    except ValueError:\n",
-    "        retry = True\n",
-    "\n",
-    "    if retry or not success:\n",
-    "        logging.info('Normalization with L-BFGS-B failed. Trying Powell')\n",
-    "        solution, success = solve_profile(ratios, 'Powell')\n",
+    "        ratios = get_protein_ratios(selection.values, column_combinations, minimum_ratios)\n",
     "        \n",
-    "    pre_lfq = selection.sum().values\n",
+    "        retry = False\n",
+    "        try:\n",
+    "            solution, success = solve_profile(ratios, 'L-BFGS-B')\n",
+    "        except ValueError:\n",
+    "            retry = True\n",
+    "\n",
+    "        if retry or not success:\n",
+    "            logging.info('Normalization with L-BFGS-B failed. Trying Powell')\n",
+    "            solution, success = solve_profile(ratios, 'Powell')\n",
+    "\n",
+    "        pre_lfq = selection.sum().values\n",
     "\n",
-    "    if not success or np.sum(~np.isnan(ratios)) == 0: # or np.sum(solution) == len(pre_lfq):\n",
-    "        profile = np.zeros_like(pre_lfq)\n",
-    "        if np.sum(np.isnan(ratios)) != ratios.size:\n",
-    "            logging.info(f'Solver failed for protein {protein} despite available ratios:\\n {ratios}')\n",
+    "        if not success or np.sum(~np.isnan(ratios)) == 0: # or np.sum(solution) == len(pre_lfq):\n",
+    "            profile = np.zeros(len(files_))\n",
+    "            if np.sum(np.isnan(ratios)) != ratios.size:\n",
+    "                logging.info(f'Solver failed for protein {protein} despite available ratios:\\n {ratios}')\n",
     "\n",
+    "        else:\n",
+    "            invalid = ((np.nansum(ratios, axis=1) == 0) & (np.nansum(ratios, axis=0) == 0))\n",
+    "            peptide_int_sum = pre_lfq.sum() * solution \n",
+    "            peptide_int_sum[invalid] = 0\n",
+    "            profile = peptide_int_sum * pre_lfq.sum() / np.sum(peptide_int_sum) #Normalize inensity again\n",
     "    else:\n",
-    "        invalid = ((np.nansum(ratios, axis=1) == 0) & (np.nansum(ratios, axis=0) == 0))\n",
-    "        peptide_int_sum = pre_lfq.sum() * solution \n",
-    "        peptide_int_sum[invalid] = 0\n",
-    "        profile = peptide_int_sum * pre_lfq.sum() / np.sum(peptide_int_sum) #Normalize inensity again\n",
-    "        \n",
-    "    \n",
+    "        pre_lfq = profile = selection.values[0]\n",
+    "\n",
+    "\n",
+    "    #Rewrite ratios\n",
+    "    profile_dict = dict(zip(files_, profile))\n",
+    "    pre_dict = dict(zip(files_, pre_lfq))\n",
+    "    profile = np.array([0 if file not in profile_dict else profile_dict[file] for file in files])\n",
+    "    pre_lfq = np.array([0 if file not in pre_dict else pre_dict[file] for file in files])\n",
+    "\n",
     "    return profile, pre_lfq, protein  \n"
    ]
   },
@@ -1338,12 +1344,12 @@
       ],
       "text/plain": [
        "  precursor filename protein_group  ms1_int_sum\n",
-       "0    Prec_1        A             X      0.6\n",
-       "1    Prec_1        B             X      0.8\n",
-       "2    Prec_1        C             X      1.0\n",
-       "3    Prec_2        A             X      0.6\n",
-       "4    Prec_2        B             X      1.2\n",
-       "5    Prec_2        C             X      1.4"
+       "0    Prec_1        A             X          0.6\n",
+       "1    Prec_1        B             X          0.8\n",
+       "2    Prec_1        C             X          1.0\n",
+       "3    Prec_2        A             X          0.6\n",
+       "4    Prec_2        B             X          1.2\n",
+       "5    Prec_2        C             X          1.4"
       ]
      },
      "metadata": {},
@@ -1473,9 +1479,6 @@
     "\n",
     "    #Take the best precursor for protein quantification. .max()\n",
     "    grouped = df[[field, 'filename','precursor','protein_group']].groupby(['protein_group','filename','precursor']).max()\n",
-    "\n",
-    "    column_combinations = List()\n",
-    "    [column_combinations.append(_) for _ in combinations(range(len(files)), 2)]\n",
     "    \n",
     "    files = df['filename'].unique().tolist()\n",
     "    files.sort()\n",
 
@@ -1600,12 +1600,14 @@
     "\n",
     "@click.command(\n",
     "    \"import\",\n",
-    "    help=\"Import and convert raw data from vendor to `.ms_data.hdf` file.\",\n",
-    "    short_help=\"Import and convert raw data from vendor to `.ms_data.hdf` file.\"\n",
+    "    help=\"Import and convert raw data from vendor to `.ms_data.hdf` file with default settings.\",\n",
+    "    short_help=\"Import and convert raw data from vendor to `.ms_data.hdf` file with default settings.\"\n",
     ")\n",
     "@CLICK_SETTINGS_OPTION\n",
     "def cli_import(settings_file):\n",
-    "    settings = alphapept.settings.load_settings(settings_file)\n",
+    "    from alphapept.paths import DEFAULT_SETTINGS_PATH\n",
+    "    settings = alphapept.settings.load_settings_as_template(DEFAULT_SETTINGS_PATH)\n",
+    "    settings['experiment']['file_paths'] = [settings_file]\n",
     "    import_raw_data(settings)\n",
     "\n",
     "\n",
 
@@ -1,5 +1,5 @@
 Package: alphapept
-Version: 0.4.0
+Version: 0.4.1
 Architecture: all
 Maintainer: Mann Labs <[email protected]>
 Description: AlphaPept
 
@@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel
 # Setting up the local package
 cd release/one_click_linux_gui
 # Make sure you include the required extra packages and always use the stable or very-stable options!
-pip install "../../dist/alphapept-0.4.0-py3-none-any.whl[stable,gui-stable]"
+pip install "../../dist/alphapept-0.4.1-py3-none-any.whl[stable,gui-stable]"
 
 # Creating the stand-alone pyinstaller folder
 pip install pyinstaller==4.2
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "0.4.0"`
	`1`	`+__version__ = "0.4.1"`
`2`	`2`
`3`	`3`	`__requirements__ = {`
`4`	`4`	`"": "requirements/requirements.txt",`