-
Notifications
You must be signed in to change notification settings - Fork 215
fix: Umap duplicate index #489
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 5 commits
db40c9e
8f3b839
e2f2b6c
6141cd3
aca1c0c
f8bcde0
394e877
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -132,13 +132,13 @@ def xls(self, xls_or_url, source='default', verbose=None): | |
p = print if verbose else (lambda x: 1) | ||
|
||
# source is either undefined, a string, or a (partial) bindings object | ||
if type(source) == str and source not in self.source_to_mappings: | ||
if type(source) is str and source not in self.source_to_mappings: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. normally these are |
||
p('Unknown source type', source) | ||
raise Exception('Unknown nodexl source type %s' % str(source)) | ||
bindings = self.source_to_mappings[source] if type(source) == str else source | ||
bindings = self.source_to_mappings[source] if type(source) is str else source | ||
|
||
p('Fetching...') | ||
xls = pd.ExcelFile(xls_or_url) if type(xls_or_url) == str else xls_or_url | ||
xls = pd.ExcelFile(xls_or_url) if type(xls_or_url) is str else xls_or_url | ||
|
||
p('Formatting edges') | ||
edges_df = self.xls_to_edges_df(xls, bindings['edges_df_transformer']) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -587,12 +587,13 @@ def umap( | |
|
||
if kind == "nodes": | ||
index = res._nodes.index | ||
|
||
if res._node is None: | ||
logger.debug("-Writing new node name") | ||
res._nodes[config.IMPLICIT_NODE_ID] = range(len(res._nodes)) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clever Will this work if cudf? Will this break somehow downstream if .edges is bound and user was using string name IDs for src dst? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Strange, I thought this was already done this way... |
||
res = res.nodes( # type: ignore | ||
res._nodes.reset_index(drop=True) | ||
.reset_index() | ||
.rename(columns={"index": config.IMPLICIT_NODE_ID}), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not just call it |
||
res._nodes, | ||
config.IMPLICIT_NODE_ID, | ||
) | ||
res._nodes.index = index | ||
|
@@ -719,7 +720,7 @@ def _bind_xy_from_umap( | |
else: | ||
emb = res._edge_embedding | ||
|
||
if type(df) == type(emb): | ||
if type(df) is type(emb): | ||
df[x_name] = emb.values.T[0] | ||
df[y_name] = emb.values.T[1] | ||
elif isinstance(df, pd.DataFrame) and 'cudf.core.dataframe' in str(getmodule(emb)): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
normally these are
isinstance(x, Y)