Skip to content
This repository was archived by the owner on Aug 9, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ __pycache__/
*.py[cod]
*~
ipython/.ipynb_checkpoints
.pytest_cache

# C extensions
*.so
Expand Down
4 changes: 4 additions & 0 deletions doc/revscoring.datasources.session_oriented.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
revscoring.datasources.session_oriented
=======================================

.. automodule:: revscoring.datasources.session_oriented
1 change: 1 addition & 0 deletions docs/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Subpackages
revscoring.datasources
revscoring.datasources.meta
revscoring.datasources.revision_oriented
revscoring.datasources.session_oriented
revscoring.extractors
revscoring.features
revscoring.features.meta
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
import sys

import alabaster
import revscoring

dir_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
print(dir_path)
sys.path.insert(0, dir_path)

import revscoring

# -- General configuration ------------------------------------------------

Expand Down
4 changes: 4 additions & 0 deletions docs/revscoring.datasources.session_oriented.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
revscoring.datasources.session_oriented
=======================================

.. automodule:: revscoring.datasources.session_oriented
6 changes: 2 additions & 4 deletions revscoring/datasources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
:class:`~revscoring.Datasource` processors are tended to
be :func:`~revscoring.dependencies.solve`'d as dependencies. The
provided datasources are split conceptually into a set of modules. Currently,
there is one module: :mod:`~revscoring.datasources.revision_oriented`.
there are two modules: :mod:`~revscoring.datasources.revision_oriented` and
:mod:`~revscoring.datasources.session_oriented`.

Meta-datasources
++++++++++++++++
Expand All @@ -22,9 +23,6 @@
++++++++++++
.. automodule:: revscoring.datasources.datasource




"""
from .datasource import Datasource

Expand Down
13 changes: 13 additions & 0 deletions revscoring/datasources/meta/expanders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from ..datasource import Datasource


class list_of(Datasource):

def __init__(self, dependent, depends_on=None, name=None):
name = self._format_name(name, [dependent])
super().__init__(
name, self.process, depends_on=depends_on)
self.dependency = dependent

def process(self, *lists_of_values):
return [self.dependency(*values) for values in zip(*lists_of_values)]
43 changes: 41 additions & 2 deletions revscoring/datasources/meta/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,7 @@ class filter_keys(Datasource):

:Parameters:
table_datasource : :class:`revscoring.Datasource`
A datasource that generates a table including only the specified
keys
A datasource that generates a table with keys and values
keys : `iterable` ( `hashable` )
The keys to select from the table
name : `str`
Expand All @@ -138,3 +137,43 @@ def process(self, table):
new_table[key] = table[key]

return new_table


class first(Datasource):
"""
Returns the first item in an indexable collection (e.g., a list)

:Parameters:
items_datasource : :class:`revscoring.Datasource`
A datasource that generates an indexable sequence
name : `str`
A name for the datasource
"""

def __init__(self, items_datasource, name=None):
name = self._format_name(
name, [items_datasource])
super().__init__(name, self.process, depends_on=[items_datasource])

def process(self, items):
return items[0]


class last(Datasource):
"""
Returns the last item in an indexable collection (e.g., a list)

:Parameters:
items_datasource : :class:`revscoring.Datasource`
A datasource that generates an indexable sequence
name : `str`
A name for the datasource
"""

def __init__(self, items_datasource, name=None):
name = self._format_name(
name, [items_datasource])
super().__init__(name, self.process, depends_on=[items_datasource])

def process(self, items):
return items[-1]
181 changes: 181 additions & 0 deletions revscoring/datasources/session_oriented.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"""
Implements a set of datasources oriented off of a single revision. This is
useful for extracting features of edit and article quality.

.. autodata:: revscoring.datasources.session_oriented.session

Supporting classes
++++++++++++++++++

.. autoclass:: revscoring.datasources.session_oriented.Session
:members:
:member-order: bysource

Supporting functions
++++++++++++++++++++

.. autofunction:: revscoring.datasources.session_oriented.list_of_tree

.. autofunction:: revscoring.datasources.session_oriented.list_of_ify
"""
import logging
import re
from functools import wraps
from inspect import getmembers, ismethod

from revscoring import Feature, FeatureVector
from revscoring.features.meta import expanders as feature_expanders

from ..dependencies import DependentSet
from .datasource import Datasource
from .meta import expanders as datasource_expanders
from .revision_oriented import Revision, User

logger = logging.getLogger(__name__)


def list_of_tree(dependent_set, rewrite_name=None, cache=None):
"""
Converts a :class:`~revscoring.DependentSet` and all of the
:class:`~revscoring.Dependent` named into a new
:class:`~revscoring.DependentSet` with
:func:`~revscoring.datasources.session_oriented.list_of_ify` applied.

:Parameters:
dependent_set : :class:`~revscoring.DependentSet`
A dependent set to convert
rewrite_name : function
A function to apply to the dependent's name when re-creating it.
cache : dict(:class:`~revscoring.Feature` | :class:`~revscoring.FeatureVector` | :class:`~revscoring.Datasource`)
A map of dependents that have already been converted.
"""
logger.debug("Applying list_of_tree to {0}".format(dependent_set.name))
cache = cache if cache is not None else {}
rewrite_name = rewrite_name if rewrite_name is not None else \
lambda name: name

# Rewrites all dependents.
for attr, dependent in dependent_set.dependents.items():
new_dependent = list_of_ify(dependent, rewrite_name, cache)
setattr(dependent_set, attr, new_dependent)

# Iterate into all sub-DependentSets
for attr, sub_dependent_set in dependent_set.dependent_sets.items():
if attr.startswith("_"):
pass
else:
logger.debug("Running list_of_tree on {0}".format(attr))
new_dependent_set = list_of_tree(
sub_dependent_set, rewrite_name, cache)
setattr(dependent_set, attr, new_dependent_set)

# Iterate into all meta-dependents (methods that return a new dependent)
for attr, method in getmembers(dependent_set, ismethod):
if not hasattr(method, "meta_dependent"):
pass
else:
list_of_meta_method = meta_list_of_ify(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

method, rewrite_name, cache)
setattr(dependent_set, attr, list_of_meta_method)

return dependent_set


def list_of_ify(dependent, rewrite_name, cache):
"""
Converts any :class:`~revscoring.Feature`,
:class:`~revscoring.FeatureVector`, or :class:`~revscoring.Datasource` into
an equivalent "list of" the same dependent. Dependencies are converted
recursively and a cache is maintained for memoization.

:Parameters:
dependent : (:class:`~revscoring.Feature` | :class:`~revscoring.FeatureVector` | :class:`~revscoring.Datasource`)
A dependent to convert
rewrite_name : function
A function to apply to the dependent's name when re-creating it.
cache : dict(:class:`~revscoring.Feature` | :class:`~revscoring.FeatureVector` | :class:`~revscoring.Datasource`)
A map of dependents that have already been converted.
"""

new_name = rewrite_name(dependent.name)
if new_name in cache:
logger.debug("list_of_ify {0} in the cache".format(dependent.name))
return cache[new_name]
else:
logger.debug("list_of_ify is modifying {0} into a list_of".format(dependent.name))
new_dependencies = [list_of_ify(dependency, rewrite_name, cache)
for dependency in dependent.dependencies]

if isinstance(dependent, Datasource):
new_dependent = datasource_expanders.list_of(
dependent, depends_on=new_dependencies, name=new_name)
elif isinstance(dependent, FeatureVector):
new_dependent = datasource_expanders.list_of(
dependent, depends_on=new_dependencies, name=new_name)
elif isinstance(dependent, Feature):
new_dependent = feature_expanders.list_of(
dependent, depends_on=new_dependencies, name=new_name)
else:
raise TypeError("Cannot convert type {0} into a list_of"
.format(type(dependent)))

cache[new_name] = new_dependent
return cache[new_name]


def meta_list_of_ify(method, rewrite_name, cache):
@wraps(method)
def wrapper(*args, **kwargs):
dependent = method(*args, **kwargs)
return list_of_ify(dependent, rewrite_name, cache)

return wrapper


def rewrite_name(name):
return re.sub(r"(^|\.)revision\.", r"\1session.revisions.", name)


class Session(DependentSet):
"""
Represents a session -- an ordered list of revisions
"""
def __init__(self, name):
super().__init__(name)
self.revisions = list_of_tree(Revision(
"session.revisions",
include_page_creation=True,
include_content=True,
include_user=False,
include_page_suggested=True),
rewrite_name=rewrite_name)
"""
:class:`revscoring.datasources.revision_oriented.Revision`: modified by
:func:`~revscoring.datasources.session_oriented.list_of_tree()`
"""

self.user = User(
name + ".user",
include_info=True,
include_last_revision=True
)
"""
:class:`revscoring.datasources.revision_oriented.User`
"""

session = Session("session")
"""
Represents the session of interest. Implements this structure:

* session: :class:`~revscoring.datasources.session_oriented.Session`
* revisions: :class:`~revscoring.datasources.revision_oriented.Revision`
* diff: :class:`~revscoring.datasources.revision_oriented.Diff`
* page: :class:`~revscoring.datasources.revision_oriented.Page`
* namespace: :class:`~revscoring.datasources.revision_oriented.Namespace`
* creation: :class:`~revscoring.datasources.revision_oriented.Revision`
* parent: :class:`~revscoring.datasources.revision_oriented.Revision`
* user: :class:`~revscoring.datasources.revision_oriented.User`
* user: :class:`~revscoring.datasources.revision_oriented.User`
* info: :class:`~revscoring.datasources.revision_oriented.UserInfo`
* last_revision: :class:`~revscoring.datasources.revision_oriented.Revision`
""" # noqa
Loading