From 4516c4962c85d3eb43b4ff142531a3bc83da84e9 Mon Sep 17 00:00:00 2001
From: David Ormsbee <dave@edx.org>
Date: Thu, 26 Feb 2015 18:11:34 -0500
Subject: [PATCH] Optimize grading/progress page to reduce database queries
 (cache max scores).

The progress page did a number of things that make performance terrible for
courses with large numbers of problems, particularly if those problems are
customresponse CapaModule problems that need to be executed via codejail.

The grading code takes pains to not instantiate student state and execute the
problem code. If a student has answered the question, the max score is stored
in StudentModule. However, if the student hasn't attempted the question yet, we
have to run the problem code just to call .max_score() on it. This is necessary
in grade() if the student has answered other problems in the assignment (so we
can know what to divide by). This is always necessary to know in
progress_summary() because we list out every problem there. Code execution can
be especially slow if the problems need to invoke codejail.

To address this, we create a MaxScoresCache that will cache the max raw score
possible for every problem. We select the cache keys so that it will
automatically become invalidated when a new version of the course is published.

The fundamental assumption here is that a problem cannot have two different
max score values for two unscored students. A problem *can* score two students
differently such that they have different max scores. So Carlos can have 2/3 on
a problem, while Lyla gets 3/4. But if neither Carlos nor Lyla has ever
interacted with the problem (i.e. they're just seeing it on their progress
page), they must both see 0/4 -- it cannot be the case that Carlos sees 0/3 and
Lyla sees 0/4.

We used to load all student state into two separate FieldDataCache instances,
after which we do a bunch of individual queries for scored items. Part of this
split-up was done because of locking problems, but I think we might have gotten
overzealous with our manual transaction hammer.

In this commit, we consolidate all state access in grade() and progress()
to use one shared FieldDataCache. We also use a filter so that we only pull
back StudentModule state for things that might possibly affect the grade --
items that either have scores or have children.

Because some older XModules do work in their __init__() methods (like Video),
instantiating them takes time, particularly on large courses. This commit also
changes the code that fetches the grading_context to filter out children that
can't possibly affect the grade.

Finally, we introduce a ScoresClient that also tries to fetch score
information all at once, instead of in separate queries. Technically, we are
fetching this information redundantly, but that's because the state and score
interfaces are being teased apart as we move forward. Still, this only
amounts to one extra SQL query, and has very little impact on performance
overall.

Much thanks to @adampalay -- his hackathon work in #7168 formed the basis of
this.

https://openedx.atlassian.net/browse/CSM-17
---
 common/lib/xmodule/xmodule/course_module.py   |  16 +-
 .../tests/test_field_override_performance.py  |  54 +++---
 lms/djangoapps/courseware/grades.py           | 161 +++++++++++-------
 lms/djangoapps/courseware/model_data.py       |  64 ++++++-
 lms/djangoapps/courseware/models.py           |   5 +-
 .../courseware/tests/test_grades.py           |   7 +-
 .../courseware/tests/test_model_data.py       |   3 +-
 .../tests/test_submitting_problems.py         |  43 +++++
 lms/djangoapps/courseware/views.py            |  13 +-
 lms/envs/common.py                            |   6 +-
 10 files changed, 270 insertions(+), 102 deletions(-)

diff --git a/common/lib/xmodule/xmodule/course_module.py b/common/lib/xmodule/xmodule/course_module.py
index 5a5066c34433..ecf7f52de7b4 100644
--- a/common/lib/xmodule/xmodule/course_module.py
+++ b/common/lib/xmodule/xmodule/course_module.py
@@ -20,6 +20,7 @@
 from xmodule.mixin import LicenseMixin
 import json
 
+from xblock.core import XBlock
 from xblock.fields import Scope, List, String, Dict, Boolean, Integer, Float
 from .fields import Date
 from django.utils.timezone import UTC
@@ -1315,11 +1316,15 @@ def grading_context(self):
         except UndefinedContext:
             module = self
 
+        def possibly_scored(usage_key):
+            """Can this XBlock type can have a score or children?"""
+            return usage_key.block_type in self.block_types_affecting_grading
+
         all_descriptors = []
         graded_sections = {}
 
         def yield_descriptor_descendents(module_descriptor):
-            for child in module_descriptor.get_children():
+            for child in module_descriptor.get_children(usage_key_filter=possibly_scored):
                 yield child
                 for module_descriptor in yield_descriptor_descendents(child):
                     yield module_descriptor
@@ -1345,6 +1350,15 @@ def yield_descriptor_descendents(module_descriptor):
         return {'graded_sections': graded_sections,
                 'all_descriptors': all_descriptors, }
 
+    @lazy
+    def block_types_affecting_grading(self):
+        """Return all block types that could impact grading (i.e. scored, or having children)."""
+        return frozenset(
+            cat for (cat, xblock_class) in XBlock.load_classes() if (
+                getattr(xblock_class, 'has_score', False) or getattr(xblock_class, 'has_children', False)
+            )
+        )
+
     @staticmethod
     def make_id(org, course, url_name):
         return '/'.join([org, course, url_name])
diff --git a/lms/djangoapps/ccx/tests/test_field_override_performance.py b/lms/djangoapps/ccx/tests/test_field_override_performance.py
index afb013e3c16a..e012523c2858 100644
--- a/lms/djangoapps/ccx/tests/test_field_override_performance.py
+++ b/lms/djangoapps/ccx/tests/test_field_override_performance.py
@@ -29,7 +29,11 @@
 
 @attr('shard_1')
 @mock.patch.dict(
-    'django.conf.settings.FEATURES', {'ENABLE_XBLOCK_VIEW_ENDPOINT': True}
+    'django.conf.settings.FEATURES',
+    {
+        'ENABLE_XBLOCK_VIEW_ENDPOINT': True,
+        'ENABLE_MAX_SCORE_CACHE': False,
+    }
 )
 @ddt.ddt
 class FieldOverridePerformanceTestCase(ProceduralCourseTestMixin,
@@ -173,18 +177,18 @@ class TestFieldOverrideMongoPerformance(FieldOverridePerformanceTestCase):
 
     TEST_DATA = {
         # (providers, course_width, enable_ccx): # of sql queries, # of mongo queries, # of xblocks
-        ('no_overrides', 1, True): (27, 7, 14),
-        ('no_overrides', 2, True): (135, 7, 85),
-        ('no_overrides', 3, True): (595, 7, 336),
-        ('ccx', 1, True): (27, 7, 14),
-        ('ccx', 2, True): (135, 7, 85),
-        ('ccx', 3, True): (595, 7, 336),
-        ('no_overrides', 1, False): (27, 7, 14),
-        ('no_overrides', 2, False): (135, 7, 85),
-        ('no_overrides', 3, False): (595, 7, 336),
-        ('ccx', 1, False): (27, 7, 14),
-        ('ccx', 2, False): (135, 7, 85),
-        ('ccx', 3, False): (595, 7, 336),
+        ('no_overrides', 1, True): (23, 7, 14),
+        ('no_overrides', 2, True): (68, 7, 85),
+        ('no_overrides', 3, True): (263, 7, 336),
+        ('ccx', 1, True): (23, 7, 14),
+        ('ccx', 2, True): (68, 7, 85),
+        ('ccx', 3, True): (263, 7, 336),
+        ('no_overrides', 1, False): (23, 7, 14),
+        ('no_overrides', 2, False): (68, 7, 85),
+        ('no_overrides', 3, False): (263, 7, 336),
+        ('ccx', 1, False): (23, 7, 14),
+        ('ccx', 2, False): (68, 7, 85),
+        ('ccx', 3, False): (263, 7, 336),
     }
 
 
@@ -196,16 +200,16 @@ class TestFieldOverrideSplitPerformance(FieldOverridePerformanceTestCase):
     __test__ = True
 
     TEST_DATA = {
-        ('no_overrides', 1, True): (27, 4, 9),
-        ('no_overrides', 2, True): (135, 19, 54),
-        ('no_overrides', 3, True): (595, 84, 215),
-        ('ccx', 1, True): (27, 4, 9),
-        ('ccx', 2, True): (135, 19, 54),
-        ('ccx', 3, True): (595, 84, 215),
-        ('no_overrides', 1, False): (27, 4, 9),
-        ('no_overrides', 2, False): (135, 19, 54),
-        ('no_overrides', 3, False): (595, 84, 215),
-        ('ccx', 1, False): (27, 4, 9),
-        ('ccx', 2, False): (135, 19, 54),
-        ('ccx', 3, False): (595, 84, 215),
+        ('no_overrides', 1, True): (23, 4, 9),
+        ('no_overrides', 2, True): (68, 19, 54),
+        ('no_overrides', 3, True): (263, 84, 215),
+        ('ccx', 1, True): (23, 4, 9),
+        ('ccx', 2, True): (68, 19, 54),
+        ('ccx', 3, True): (263, 84, 215),
+        ('no_overrides', 1, False): (23, 4, 9),
+        ('no_overrides', 2, False): (68, 19, 54),
+        ('no_overrides', 3, False): (263, 84, 215),
+        ('ccx', 1, False): (23, 4, 9),
+        ('ccx', 2, False): (68, 19, 54),
+        ('ccx', 3, False): (263, 84, 215),
     }
diff --git a/lms/djangoapps/courseware/grades.py b/lms/djangoapps/courseware/grades.py
index db141547a06b..210b9b7b7b82 100644
--- a/lms/djangoapps/courseware/grades.py
+++ b/lms/djangoapps/courseware/grades.py
@@ -15,7 +15,7 @@
 import dogstats_wrapper as dog_stats_api
 
 from courseware import courses
-from courseware.model_data import FieldDataCache
+from courseware.model_data import FieldDataCache, ScoresClient
 from student.models import anonymous_id_for_user
 from util.module_utils import yield_dynamic_descriptor_descendants
 from xmodule import graders
@@ -27,9 +27,9 @@
 from submissions import api as sub_api  # installed from the edx-submissions repository
 from opaque_keys import InvalidKeyError
 from opaque_keys.edx.keys import CourseKey
-
 from openedx.core.djangoapps.signals.signals import GRADES_UPDATED
 
+
 log = logging.getLogger("edx.courseware")
 
 
@@ -201,6 +201,7 @@ def field_data_cache_for_grading(course, user):
         descriptor_filter=descriptor_filter
     )
 
+
 def answer_distributions(course_key):
     """
     Given a course_key, return answer distributions in the form of a dictionary
@@ -293,14 +294,14 @@ def url_and_display_name(usage_key):
 
 
 @transaction.commit_manually
-def grade(student, request, course, keep_raw_scores=False):
+def grade(student, request, course, keep_raw_scores=False, field_data_cache=None, scores_client=None):
     """
     Wraps "_grade" with the manual_transaction context manager just in case
     there are unanticipated errors.
     Send a signal to update the minimum grade requirement status.
     """
     with manual_transaction():
-        grade_summary = _grade(student, request, course, keep_raw_scores)
+        grade_summary = _grade(student, request, course, keep_raw_scores, field_data_cache, scores_client)
         responses = GRADES_UPDATED.send_robust(
             sender=None,
             username=student.username,
@@ -315,7 +316,7 @@ def grade(student, request, course, keep_raw_scores=False):
         return grade_summary
 
 
-def _grade(student, request, course, keep_raw_scores):
+def _grade(student, request, course, keep_raw_scores, field_data_cache, scores_client):
     """
     Unwrapped version of "grade"
 
@@ -336,15 +337,25 @@ def _grade(student, request, course, keep_raw_scores):
 
     More information on the format is in the docstring for CourseGrader.
     """
-    grading_context = course.grading_context
-    raw_scores = []
+    if field_data_cache is None:
+        with manual_transaction():
+            field_data_cache = field_data_cache_for_grading(course, student)
+    if scores_client is None:
+        scores_client = ScoresClient.from_field_data_cache(field_data_cache)
 
     # Dict of item_ids -> (earned, possible) point tuples. This *only* grabs
     # scores that were registered with the submissions API, which for the moment
     # means only openassessment (edx-ora2)
-    submissions_scores = sub_api.get_scores(
-        course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id)
-    )
+    submissions_scores = sub_api.get_scores(course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id))
+    max_scores_cache = MaxScoresCache.create_for_course(course)
+    # For the moment, we have to get scorable_locations from field_data_cache
+    # and not from scores_client, because scores_client is ignorant of things
+    # in the submissions API. As a further refactoring step, submissions should
+    # be hidden behind the ScoresClient.
+    max_scores_cache.fetch_from_remote(field_data_cache.scorable_locations)
+
+    grading_context = course.grading_context
+    raw_scores = []
 
     totaled_scores = {}
     # This next complicated loop is just to collect the totaled_scores, which is
@@ -372,13 +383,10 @@ def _grade(student, request, course, keep_raw_scores):
                 )
 
             if not should_grade_section:
-                with manual_transaction():
-                    should_grade_section = StudentModule.objects.filter(
-                        student=student,
-                        module_state_key__in=[
-                            descriptor.location for descriptor in section['xmoduledescriptors']
-                        ]
-                    ).exists()
+                should_grade_section = any(
+                    descriptor.location in scores_client
+                    for descriptor in section['xmoduledescriptors']
+                )
 
             # If we haven't seen a single problem in the section, we don't have
             # to grade it at all! We can assume 0%
@@ -389,24 +397,24 @@ def create_module(descriptor):
                     '''creates an XModule instance given a descriptor'''
                     # TODO: We need the request to pass into here. If we could forego that, our arguments
                     # would be simpler
-                    with manual_transaction():
-                        field_data_cache = FieldDataCache([descriptor], course.id, student)
                     return get_module_for_descriptor(
                         student, request, descriptor, field_data_cache, course.id, course=course
                     )
 
-                for module_descriptor in yield_dynamic_descriptor_descendants(
-                        section_descriptor, student.id, create_module
-                ):
-
+                descendants = yield_dynamic_descriptor_descendants(section_descriptor, student.id, create_module)
+                for module_descriptor in descendants:
                     (correct, total) = get_score(
-                        course.id, student, module_descriptor, create_module, scores_cache=submissions_scores
-
+                        student,
+                        module_descriptor,
+                        create_module,
+                        scores_client,
+                        submissions_scores,
+                        max_scores_cache,
                     )
                     if correct is None and total is None:
                         continue
 
-                    if settings.GENERATE_PROFILE_SCORES:  	# for debugging!
+                    if settings.GENERATE_PROFILE_SCORES:    # for debugging!
                         if total > 1:
                             correct = random.randrange(max(total - 2, 1), total + 1)
                         else:
@@ -432,6 +440,7 @@ def create_module(descriptor):
                     section_name,
                     section_location=section_descriptor.location
                 )
+
                 if keep_raw_scores:
                     raw_scores += scores
             else:
@@ -458,11 +467,14 @@ def create_module(descriptor):
 
     letter_grade = grade_for_percentage(course.grade_cutoffs, grade_summary['percent'])
     grade_summary['grade'] = letter_grade
-    grade_summary['totaled_scores'] = totaled_scores  	# make this available, eg for instructor download & debugging
+    grade_summary['totaled_scores'] = totaled_scores   # make this available, eg for instructor download & debugging
     if keep_raw_scores:
         # way to get all RAW scores out to instructor
         # so grader can be double-checked
         grade_summary['raw_scores'] = raw_scores
+
+    max_scores_cache.push_to_remote()
+
     return grade_summary
 
 
@@ -516,7 +528,7 @@ def get_weighted_scores(student, course, field_data_cache=None, scores_client=No
 # TODO: This method is not very good. It was written in the old course style and
 # then converted over and performance is not good. Once the progress page is redesigned
 # to not have the progress summary this method should be deleted (so it won't be copied).
-def _progress_summary(student, request, course):
+def _progress_summary(student, request, course, field_data_cache=None, scores_client=None):
     """
     Unwrapped version of "progress_summary".
 
@@ -538,21 +550,26 @@ def _progress_summary(student, request, course):
 
     """
     with manual_transaction():
-        field_data_cache = FieldDataCache.cache_for_descriptor_descendents(
-            course.id, student, course, depth=None
-        )
-        # TODO: We need the request to pass into here. If we could
-        # forego that, our arguments would be simpler
+        if field_data_cache is None:
+            field_data_cache = field_data_cache_for_grading(course, student)
+        if scores_client is None:
+            scores_client = ScoresClient.from_field_data_cache(field_data_cache)
+
         course_module = get_module_for_descriptor(
             student, request, course, field_data_cache, course.id, course=course
         )
         if not course_module:
-            # This student must not have access to the course.
             return None
 
         course_module = getattr(course_module, '_x_module', course_module)
 
     submissions_scores = sub_api.get_scores(course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id))
+    max_scores_cache = MaxScoresCache.create_for_course(course)
+    # For the moment, we have to get scorable_locations from field_data_cache
+    # and not from scores_client, because scores_client is ignorant of things
+    # in the submissions API. As a further refactoring step, submissions should
+    # be hidden behind the ScoresClient.
+    max_scores_cache.fetch_from_remote(field_data_cache.scorable_locations)
 
     chapters = []
     locations_to_children = defaultdict(list)
@@ -580,7 +597,12 @@ def _progress_summary(student, request, course):
                 ):
                     locations_to_children[module_descriptor.parent].append(module_descriptor.location)
                     (correct, total) = get_score(
-                        course.id, student, module_descriptor, module_creator, scores_cache=submissions_scores
+                        student,
+                        module_descriptor,
+                        module_creator,
+                        scores_client,
+                        submissions_scores,
+                        max_scores_cache,
                     )
                     if correct is None and total is None:
                         continue
@@ -623,7 +645,15 @@ def _progress_summary(student, request, course):
     return ProgressSummary(chapters, locations_to_weighted_scores, locations_to_children)
 
 
-def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=None):
+def weighted_score(raw_correct, raw_total, weight):
+    """Return a tuple that represents the weighted (correct, total) score."""
+    # If there is no weighting, or weighting can't be applied, return input.
+    if weight is None or raw_total == 0:
+        return (raw_correct, raw_total)
+    return (float(raw_correct) * weight / raw_total, float(weight))
+
+
+def get_score(user, problem_descriptor, module_creator, scores_client, submissions_scores_cache, max_scores_cache):
     """
     Return the score for a user on a problem, as a tuple (correct, total).
     e.g. (5,7) if you got 5 out of 7 points.
@@ -633,19 +663,21 @@ def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=
 
     user: a Student object
     problem_descriptor: an XModuleDescriptor
+    scores_client: an initialized ScoresClient
     module_creator: a function that takes a descriptor, and returns the corresponding XModule for this user.
            Can return None if user doesn't have access, or if something else went wrong.
-    scores_cache: A dict of location names to (earned, possible) point tuples.
+    submissions_scores_cache: A dict of location names to (earned, possible) point tuples.
            If an entry is found in this cache, it takes precedence.
+    max_scores_cache: a MaxScoresCache
     """
-    scores_cache = scores_cache or {}
+    submissions_scores_cache = submissions_scores_cache or {}
 
     if not user.is_authenticated():
         return (None, None)
 
     location_url = problem_descriptor.location.to_deprecated_string()
-    if location_url in scores_cache:
-        return scores_cache[location_url]
+    if location_url in submissions_scores_cache:
+        return submissions_scores_cache[location_url]
 
     # some problems have state that is updated independently of interaction
     # with the LMS, so they need to always be scored. (E.g. foldit.)
@@ -663,22 +695,27 @@ def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=
         # These are not problems, and do not have a score
         return (None, None)
 
-    try:
-        student_module = StudentModule.objects.get(
-            student=user,
-            course_id=course_id,
-            module_state_key=problem_descriptor.location
-        )
-    except StudentModule.DoesNotExist:
-        student_module = None
-
-    if student_module is not None and student_module.max_grade is not None:
-        correct = student_module.grade if student_module.grade is not None else 0
-        total = student_module.max_grade
+    # Check the score that comes from the ScoresClient (out of CSM).
+    # If an entry exists and has a total associated with it, we trust that
+    # value. This is important for cases where a student might have seen an
+    # older version of the problem -- they're still graded on what was possible
+    # when they tried the problem, not what it's worth now.
+    score = scores_client.get(problem_descriptor.location)
+    cached_max_score = max_scores_cache.get(problem_descriptor.location)
+    if score and score.total is not None:
+        # We have a valid score, just use it.
+        correct = score.correct if score.correct is not None else 0.0
+        total = score.total
+    elif cached_max_score is not None and settings.FEATURES.get("ENABLE_MAX_SCORE_CACHE"):
+        # We don't have a valid score entry but we know from our cache what the
+        # max possible score is, so they've earned 0.0 / cached_max_score
+        correct = 0.0
+        total = cached_max_score
     else:
-        # If the problem was not in the cache, or hasn't been graded yet,
-        # we need to instantiate the problem.
-        # Otherwise, the max score (cached in student_module) won't be available
+        # This means we don't have a valid score entry and we don't have a
+        # cached_max_score on hand. We know they've earned 0.0 points on this,
+        # but we need to instantiate the module (i.e. load student state) in
+        # order to find out how much it was worth.
         problem = module_creator(problem_descriptor)
         if problem is None:
             return (None, None)
@@ -690,17 +727,11 @@ def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=
         # In which case total might be None
         if total is None:
             return (None, None)
+        else:
+            # add location to the max score cache
+            max_scores_cache.set(problem_descriptor.location, total)
 
-    # Now we re-weight the problem, if specified
-    weight = problem_descriptor.weight
-    if weight is not None:
-        if total == 0:
-            log.exception("Cannot reweight a problem with zero total points. Problem: " + str(student_module))
-            return (correct, total)
-        correct = correct * weight / total
-        total = weight
-
-    return (correct, total)
+    return weighted_score(correct, total, problem_descriptor.weight)
 
 
 @contextmanager
diff --git a/lms/djangoapps/courseware/model_data.py b/lms/djangoapps/courseware/model_data.py
index a0e217ede7ce..faa5fdbad3a2 100644
--- a/lms/djangoapps/courseware/model_data.py
+++ b/lms/djangoapps/courseware/model_data.py
@@ -23,7 +23,7 @@
 
 import json
 from abc import abstractmethod, ABCMeta
-from collections import defaultdict
+from collections import defaultdict, namedtuple
 from .models import (
     StudentModule,
     XModuleUserStateSummaryField,
@@ -741,6 +741,7 @@ def __init__(self, descriptors, course_id, user, select_for_update=False, asides
                 self.course_id,
             ),
         }
+        self.scorable_locations = set()
         self.add_descriptors_to_cache(descriptors)
 
     def add_descriptors_to_cache(self, descriptors):
@@ -748,6 +749,7 @@ def add_descriptors_to_cache(self, descriptors):
         Add all `descriptors` to this FieldDataCache.
         """
         if self.user.is_authenticated():
+            self.scorable_locations.update(desc.location for desc in descriptors if desc.has_score)
             for scope, fields in self._fields_to_cache(descriptors).items():
                 if scope not in self.cache:
                     continue
@@ -955,3 +957,63 @@ def last_modified(self, key):
 
     def __len__(self):
         return sum(len(cache) for cache in self.cache.values())
+
+
+class ScoresClient(object):
+    """
+    Basic client interface for retrieving Score information.
+
+    Eventually, this should read and write scores, but at the moment it only
+    handles the read side of things.
+    """
+    Score = namedtuple('Score', 'correct total')
+
+    def __init__(self, course_key, user_id):
+        """Basic constructor. from_field_data_cache() is more appopriate for most uses."""
+        self.course_key = course_key
+        self.user_id = user_id
+        self._locations_to_scores = {}
+        self._has_fetched = False
+
+    def __contains__(self, location):
+        """Return True if we have a score for this location."""
+        return location in self._locations_to_scores
+
+    def fetch_scores(self, locations):
+        """Grab score information."""
+        scores_qset = StudentModule.objects.filter(
+            student_id=self.user_id,
+            course_id=self.course_key,
+            module_state_key__in=set(locations),
+        )
+        # Locations in StudentModule don't necessarily have course key info
+        # attached to them (since old mongo identifiers don't include runs).
+        # So we have to add that info back in before we put it into our lookup.
+        self._locations_to_scores.update({
+            UsageKey.from_string(location).map_into_course(self.course_key): self.Score(correct, total)
+            for location, correct, total
+            in scores_qset.values_list('module_state_key', 'grade', 'max_grade')
+        })
+        self._has_fetched = True
+
+    def get(self, location):
+        """
+        Get the score for a given location, if it exists.
+
+        If we don't have a score for that location, return `None`. Note that as
+        convention, you should be passing in a location with full course run
+        information.
+        """
+        if not self._has_fetched:
+            raise ValueError(
+                "Tried to fetch location {} from ScoresClient before fetch_scores() has run."
+                .format(location)
+            )
+        return self._locations_to_scores.get(location)
+
+    @classmethod
+    def from_field_data_cache(cls, fd_cache):
+        """Create a ScoresClient from a populated FieldDataCache."""
+        client = cls(fd_cache.course_id, fd_cache.user.id)
+        client.fetch_scores(fd_cache.scorable_locations)
+        return client
diff --git a/lms/djangoapps/courseware/models.py b/lms/djangoapps/courseware/models.py
index ac7b72520bd3..32a01e9e6ea5 100644
--- a/lms/djangoapps/courseware/models.py
+++ b/lms/djangoapps/courseware/models.py
@@ -133,7 +133,10 @@ def __repr__(self):
         return 'StudentModule<%r>' % ({
             'course_id': self.course_id,
             'module_type': self.module_type,
-            'student': self.student.username,  # pylint: disable=no-member
+            # We use the student_id instead of username to avoid a database hop.
+            # This can actually matter in cases where we're logging many of
+            # these (e.g. on a broken progress page).
+            'student_id': self.student_id,  # pylint: disable=no-member
             'module_state_key': self.module_state_key,
             'state': str(self.state)[:20],
         },)
diff --git a/lms/djangoapps/courseware/tests/test_grades.py b/lms/djangoapps/courseware/tests/test_grades.py
index b04928ab93bb..a8a26c0414ef 100644
--- a/lms/djangoapps/courseware/tests/test_grades.py
+++ b/lms/djangoapps/courseware/tests/test_grades.py
@@ -7,6 +7,9 @@
 from django.test.client import RequestFactory
 
 from mock import patch, MagicMock
+from django.test.client import RequestFactory
+
+from mock import patch
 from nose.plugins.attrib import attr
 from opaque_keys.edx.locations import SlashSeparatedCourseKey
 from opaque_keys.edx.locator import CourseLocator, BlockUsageLocator
@@ -14,7 +17,8 @@
 from courseware.grades import grade, iterate_grades_for
 from courseware.grades import field_data_cache_for_grading, grade, iterate_grades_for, MaxScoresCache, ProgressSummary
 from student.tests.factories import UserFactory
-from xmodule.modulestore.tests.factories import CourseFactory
+from student.models import CourseEnrollment
+from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory
 from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
 
 
@@ -319,4 +323,3 @@ def test_score_leaf_no_score(self):
         earned, possible = self.progress_summary.score_for_module(self.loc_m)
         self.assertEqual(earned, 0)
         self.assertEqual(possible, 0)
-
diff --git a/lms/djangoapps/courseware/tests/test_model_data.py b/lms/djangoapps/courseware/tests/test_model_data.py
index 0f87c49de357..a123309f85b1 100644
--- a/lms/djangoapps/courseware/tests/test_model_data.py
+++ b/lms/djangoapps/courseware/tests/test_model_data.py
@@ -6,8 +6,7 @@
 from nose.plugins.attrib import attr
 from functools import partial
 
-from courseware.model_data import DjangoKeyValueStore
-from courseware.model_data import InvalidScopeError, FieldDataCache
+from courseware.model_data import DjangoKeyValueStore, FieldDataCache, InvalidScopeError
 from courseware.models import StudentModule
 from courseware.models import XModuleStudentInfoField, XModuleStudentPrefsField
 
diff --git a/lms/djangoapps/courseware/tests/test_submitting_problems.py b/lms/djangoapps/courseware/tests/test_submitting_problems.py
index e64346de7a10..d00871a99079 100644
--- a/lms/djangoapps/courseware/tests/test_submitting_problems.py
+++ b/lms/djangoapps/courseware/tests/test_submitting_problems.py
@@ -109,6 +109,15 @@ def submit_question_answer(self, problem_url_name, responses):
 
         return resp
 
+    def look_at_question(self, problem_url_name):
+        """
+        Create state for a problem, but don't answer it
+        """
+        location = self.problem_location(problem_url_name)
+        modx_url = self.modx_url(location, "problem_get")
+        resp = self.client.get(modx_url)
+        return resp
+
     def reset_question_answer(self, problem_url_name):
         """
         Reset specified problem for current user.
@@ -487,6 +496,33 @@ def test_show_answer_doesnt_write_to_csm(self):
         current_count = csmh.count()
         self.assertEqual(current_count, 3)
 
+    def test_grade_with_max_score_cache(self):
+        """
+        Tests that the max score cache is populated after a grading run
+        and that the results of grading runs before and after the cache
+        warms are the same.
+        """
+        self.basic_setup()
+        self.submit_question_answer('p1', {'2_1': 'Correct'})
+        self.look_at_question('p2')
+        self.assertTrue(
+            StudentModule.objects.filter(
+                module_state_key=self.problem_location('p2')
+            ).exists()
+        )
+        location_to_cache = unicode(self.problem_location('p2'))
+        max_scores_cache = grades.MaxScoresCache.create_for_course(self.course)
+
+        # problem isn't in the cache
+        max_scores_cache.fetch_from_remote([location_to_cache])
+        self.assertIsNone(max_scores_cache.get(location_to_cache))
+        self.check_grade_percent(0.33)
+
+        # problem is in the cache
+        max_scores_cache.fetch_from_remote([location_to_cache])
+        self.assertIsNotNone(max_scores_cache.get(location_to_cache))
+        self.check_grade_percent(0.33)
+
     def test_none_grade(self):
         """
         Check grade is 0 to begin with.
@@ -504,6 +540,13 @@ def test_b_grade_exact(self):
         self.check_grade_percent(0.33)
         self.assertEqual(self.get_grade_summary()['grade'], 'B')
 
+    @patch.dict("django.conf.settings.FEATURES", {"ENABLE_MAX_SCORE_CACHE": False})
+    def test_grade_no_max_score_cache(self):
+        """
+        Tests grading when the max score cache is disabled
+        """
+        self.test_b_grade_exact()
+
     def test_b_grade_above(self):
         """
         Check grade between cutoffs.
diff --git a/lms/djangoapps/courseware/views.py b/lms/djangoapps/courseware/views.py
index d7275c7fd52e..2162104f93aa 100644
--- a/lms/djangoapps/courseware/views.py
+++ b/lms/djangoapps/courseware/views.py
@@ -44,7 +44,7 @@
     is_user_eligible_for_credit,
     is_credit_course
 )
-from courseware.model_data import FieldDataCache
+from courseware.model_data import FieldDataCache, ScoresClient
 from .module_render import toc_for_course, get_module_for_descriptor, get_module, get_module_by_usage_id
 from .entrance_exams import (
     course_has_entrance_exam,
@@ -1054,10 +1054,15 @@ def _progress(request, course_key, student_id):
     # The pre-fetching of groups is done to make auth checks not require an
     # additional DB lookup (this kills the Progress page in particular).
     student = User.objects.prefetch_related("groups").get(id=student.id)
-
-    courseware_summary = grades.progress_summary(student, request, course)
+    field_data_cache = grades.field_data_cache_for_grading(course, student)
+    scores_client = ScoresClient.from_field_data_cache(field_data_cache)
+    courseware_summary = grades.progress_summary(
+        student, request, course, field_data_cache=field_data_cache, scores_client=scores_client
+    )
+    grade_summary = grades.grade(
+        student, request, course, field_data_cache=field_data_cache, scores_client=scores_client
+    )
     studio_url = get_studio_url(course, 'settings/grading')
-    grade_summary = grades.grade(student, request, course)
 
     if courseware_summary is None:
         #This means the student didn't have access to the course (which the instructor requested)
diff --git a/lms/envs/common.py b/lms/envs/common.py
index ddfcd2c858e0..b45c3bfb4865 100644
--- a/lms/envs/common.py
+++ b/lms/envs/common.py
@@ -40,6 +40,7 @@
 from .discussionsettings import *
 import dealer.git
 from xmodule.modulestore.modulestore_settings import update_module_store_settings
+from xmodule.modulestore.edit_info import EditInfoMixin
 from xmodule.mixin import LicenseMixin
 from lms.djangoapps.lms_xblock.mixin import LmsBlockMixin
 
@@ -416,6 +417,9 @@
 
     # The block types to disable need to be specified in "x block disable config" in django admin.
     'ENABLE_DISABLING_XBLOCK_TYPES': True,
+
+    # Enable the max score cache to speed up grading
+    'ENABLE_MAX_SCORE_CACHE': True,
 }
 
 # Ignore static asset files on import which match this pattern
@@ -703,7 +707,7 @@
 # These are the Mixins that should be added to every XBlock.
 # This should be moved into an XBlock Runtime/Application object
 # once the responsibility of XBlock creation is moved out of modulestore - cpennington
-XBLOCK_MIXINS = (LmsBlockMixin, InheritanceMixin, XModuleMixin)
+XBLOCK_MIXINS = (LmsBlockMixin, InheritanceMixin, XModuleMixin, EditInfoMixin)
 
 # Allow any XBlock in the LMS
 XBLOCK_SELECT_FUNCTION = prefer_xmodules