Skip to content

Commit

Permalink
MDL-53758 search: Better results with low hit rates, improve performance
Browse files Browse the repository at this point in the history
Ensures that Solr will return available results, even if there are many
misses when using check_access(), by asking Solr for more results if the
counter says there should be more.

Improves performance by ending processing as soon as the requested page
of results is processed. Remaining number of pages is an "estimate"
based on the total result count from Solr and how many items we have
rejected up to this point.
  • Loading branch information
ericmerrill committed Apr 18, 2016
1 parent b611ade commit 053118a
Show file tree
Hide file tree
Showing 10 changed files with 827 additions and 202 deletions.
20 changes: 19 additions & 1 deletion search/classes/engine.php
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,20 @@ public function get_query_error() {
return $this->queryerror;
}

/**
* Returns the total number of documents available for the most recent call to execute_query.
*
* This can be an estimate, but should get more accurate the higher the limited passed to execute_query is.
* To do that, the engine can use (actual result returned count + count of unchecked documents), or
* (total possible docs - docs that have been checked and rejected).
*
* Engine can limit to manager::MAX_RESULTS if there is cost to determining more.
* If this cannot be computed in a reasonable way, manager::MAX_RESULTS may be returned.
*
* @return int
*/
abstract public function get_query_total_count();

/**
* Return true if file indexing is supported and enabled. False otherwise.
*
Expand Down Expand Up @@ -354,12 +368,16 @@ abstract function add_document($document, $fileindexing = false);
*
* Implementations of this function should check user context array to limit the results to contexts where the
* user have access. They should also limit the owneruserid field to manger::NO_OWNER_ID or the current user's id.
* Engines must use area->check_access() to confirm user access.
*
* Engines should reasonably attempt to fill up to limit with valid results if they are available.
*
* @param stdClass $filters Query and filters to apply.
* @param array $usercontexts Contexts where the user has access. True if the user can access all contexts.
* @param int $limit The maximum number of results to return. If empty, limit to manager::MAX_RESULTS.
* @return \core_search\document[] Results or false if no results
*/
abstract function execute_query($filters, $usercontexts);
abstract function execute_query($filters, $usercontexts, $limit = 0);

/**
* Delete all documents.
Expand Down
60 changes: 58 additions & 2 deletions search/classes/manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,61 @@ protected function get_areas_user_accesses() {
return $areascontexts;
}

/**
* Returns requested page of documents plus additional information for paging.
*
* This function does not perform any kind of security checking for access, the caller code
* should check that the current user have moodle/search:query capability.
*
* If a page is requested that is beyond the last result, the last valid page is returned in
* results, and actualpage indicates which page was returned.
*
* @param stdClass $formdata
* @param int $pagenum The 0 based page number.
* @return object An object with 3 properties:
* results => An array of \core_search\documents for the actual page.
* totalcount => Number of records that are possibly available, to base paging on.
* actualpage => The actual page returned.
*/
public function paged_search(\stdClass $formdata, $pagenum) {
$out = new \stdClass();

$perpage = static::DISPLAY_RESULTS_PER_PAGE;

// Make sure we only allow request up to max page.
$pagenum = min($pagenum, (static::MAX_RESULTS / $perpage) - 1);

// Calculate the first and last document number for the current page, 1 based.
$mindoc = ($pagenum * $perpage) + 1;
$maxdoc = ($pagenum + 1) * $perpage;

// Get engine documents, up to max.
$docs = $this->search($formdata, $maxdoc);

$resultcount = count($docs);
if ($resultcount < $maxdoc) {
// This means it couldn't give us results to max, so the count must be the max.
$out->totalcount = $resultcount;
} else {
// Get the possible count reported by engine, and limit to our max.
$out->totalcount = $this->engine->get_query_total_count();
$out->totalcount = min($out->totalcount, static::MAX_RESULTS);
}

// Determine the actual page.
if ($resultcount < $mindoc) {
// We couldn't get the min docs for this page, so determine what page we can get.
$out->actualpage = floor(($resultcount - 1) / $perpage);
} else {
$out->actualpage = $pagenum;
}

// Split the results to only return the page.
$out->results = array_slice($docs, $out->actualpage * $perpage, $perpage, true);

return $out;
}

/**
* Returns documents from the engine based on the data provided.
*
Expand All @@ -397,9 +452,10 @@ protected function get_areas_user_accesses() {
* It might return the results from the cache instead.
*
* @param stdClass $formdata
* @param int $limit The maximum number of documents to return
* @return \core_search\document[]
*/
public function search(\stdClass $formdata) {
public function search(\stdClass $formdata, $limit = 0) {
global $USER;

// Clears previous query errors.
Expand All @@ -410,7 +466,7 @@ public function search(\stdClass $formdata) {
// User can not access any context.
$docs = array();
} else {
$docs = $this->engine->execute_query($formdata, $areascontexts);
$docs = $this->engine->execute_query($formdata, $areascontexts, $limit);
}

return $docs;
Expand Down
12 changes: 6 additions & 6 deletions search/classes/output/renderer.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,26 +50,26 @@ class renderer extends \plugin_renderer_base {
* Renders search results.
*
* @param \core_search\document[] $results
* @param int $page
* @param int $page Zero based page number.
* @param int $totalcount Total number of results available.
* @param \moodle_url $url
* @return string HTML
*/
public function render_results($results, $page = 0, $url) {
public function render_results($results, $page, $totalcount, $url) {

// Paging bar.
$perpage = \core_search\manager::DISPLAY_RESULTS_PER_PAGE;
$content = $this->output->paging_bar(count($results), $page, $perpage, $url);
$content = $this->output->paging_bar($totalcount, $page, $perpage, $url);

// Results.
$resultshtml = array();
$hits = array_slice($results, $page * $perpage, $perpage, true);
foreach ($hits as $hit) {
foreach ($results as $hit) {
$resultshtml[] = $this->render_result($hit);
}
$content .= \html_writer::tag('div', implode('<hr/>', $resultshtml), array('class' => 'search-results'));

// Paging bar.
$content .= $this->output->paging_bar(count($results), $page, $perpage, $url);
$content .= $this->output->paging_bar($totalcount, $page, $perpage, $url);

return $content;
}
Expand Down
Loading

0 comments on commit 053118a

Please sign in to comment.