From 8ab4432ff79badb2cf750001dc281158a3f5814c Mon Sep 17 00:00:00 2001
From: Jared Hancock <jared@osticket.com>
Date: Fri, 25 Mar 2016 12:58:49 -0500
Subject: [PATCH] orm: Use a new iteration model not requiring caching

This adds a new concept to the ORM iteration mechanisms and allows iterating
over very large queries without caching the records neither in the result
set nor in the model cache. It also implies using the mysqli_query() method
rather than the prepared statement model, as unbuffered queries with the
prepared statement system are much slower.
---
 include/class.export.php |   1 +
 include/class.orm.php    | 391 ++++++++++++++++++++++++++-------------
 include/mysqli.php       |  44 +++--
 3 files changed, 292 insertions(+), 144 deletions(-)

diff --git a/include/class.export.php b/include/class.export.php
index 9235a18bc..485628aa3 100644
--- a/include/class.export.php
+++ b/include/class.export.php
@@ -60,6 +60,7 @@ class Export {
         $tickets = $sql->models()
             ->select_related('user', 'user__default_email', 'dept', 'staff',
                 'team', 'staff', 'cdata', 'topic', 'status', 'cdata__:priority')
+            ->options(QuerySet::OPT_NOCACHE)
             ->annotate(array(
                 'collab_count' => TicketThread::objects()
                     ->filter(array('ticket__ticket_id' => new SqlField('ticket_id', 1)))
diff --git a/include/class.orm.php b/include/class.orm.php
index e0120814d..fc4c9b6c5 100644
--- a/include/class.orm.php
+++ b/include/class.orm.php
@@ -979,8 +979,16 @@ class QuerySet implements IteratorAggregate, ArrayAccess, Serializable, Countabl
     const ASC = 'ASC';
     const DESC = 'DESC';
 
+    const OPT_NOSORT    = 'nosort';
+    const OPT_NOCACHE   = 'nocache';
+
+    const ITER_MODELS   = 1;
+    const ITER_HASH     = 2;
+    const ITER_ROW      = 3;
+
+    var $iter = self::ITER_MODELS;
+
     var $compiler = 'MySqlCompiler';
-    var $iterator = 'ModelInstanceManager';
 
     var $query;
     var $count;
@@ -1103,7 +1111,7 @@ class QuerySet implements IteratorAggregate, ArrayAccess, Serializable, Countabl
     }
 
     function models() {
-        $this->iterator = 'ModelInstanceManager';
+        $this->iter = self::ITER_MODELS;
         $this->values = $this->related = array();
         return $this;
     }
@@ -1111,7 +1119,7 @@ class QuerySet implements IteratorAggregate, ArrayAccess, Serializable, Countabl
     function values() {
         foreach (func_get_args() as $A)
             $this->values[$A] = $A;
-        $this->iterator = 'HashArrayIterator';
+        $this->iter = self::ITER_HASH;
         // This disables related models
         $this->related = false;
         return $this;
@@ -1119,7 +1127,7 @@ class QuerySet implements IteratorAggregate, ArrayAccess, Serializable, Countabl
 
     function values_flat() {
         $this->values = func_get_args();
-        $this->iterator = 'FlatArrayIterator';
+        $this->iter = self::ITER_ROW;
         // This disables related models
         $this->related = false;
         return $this;
@@ -1130,7 +1138,7 @@ class QuerySet implements IteratorAggregate, ArrayAccess, Serializable, Countabl
     }
 
     function all() {
-        return $this->getIterator()->asArray();
+        return $this->getIterator();
     }
 
     function first() {
@@ -1174,7 +1182,7 @@ class QuerySet implements IteratorAggregate, ArrayAccess, Serializable, Countabl
         }
         $class = $this->compiler;
         $compiler = new $class();
-        return $this->_count = $compiler->compileCount($this);
+        return $this->count = $compiler->compileCount($this);
     }
 
     function toSql($compiler, $model, $alias=false) {
@@ -1241,10 +1249,18 @@ class QuerySet implements IteratorAggregate, ArrayAccess, Serializable, Countabl
     }
 
     function options($options) {
+        // Make an array with $options as the only key
+        if (!is_array($options))
+            $options = array($options => 1);
+
         $this->options = array_merge($this->options, $options);
         return $this;
     }
 
+    function hasOption($option) {
+        return isset($this->options[$option]);
+    }
+
     function countSelectFields() {
         $count = count($this->values) + count($this->annotations);
         if (isset($this->extra['select']))
@@ -1288,13 +1304,36 @@ class QuerySet implements IteratorAggregate, ArrayAccess, Serializable, Countabl
     }
 
     // IteratorAggregate interface
-    function getIterator() {
-        $class = $this->iterator;
-        if (!isset($this->_iterator))
-            $this->_iterator = new $class($this);
+    function getIterator($iterator=false) {
+        if (!isset($this->_iterator)) {
+            $class = $iterator ?: $this->getIteratorClass();
+            $it = new $class($this);
+            if (!$this->hasOption(self::OPT_NOCACHE)) {
+                if ($this->iter == self::ITER_MODELS)
+                    // Add findFirst() and such
+                    $it = new ModelResultSet($it);
+                else
+                    $it = new CachedResultSet($it);
+            }
+            else {
+                $it = $it->getIterator();
+            }
+            $this->_iterator = $it;
+        }
         return $this->_iterator;
     }
 
+    function getIteratorClass() {
+        switch ($this->iter) {
+        case self::ITER_MODELS:
+            return 'ModelInstanceManager';
+        case self::ITER_HASH:
+            return 'HashArrayIterator';
+        case self::ITER_ROW:
+            return 'FlatArrayIterator';
+        }
+    }
+
     // ArrayAccess interface
     function offsetExists($offset) {
         return $this->getIterator()->offsetExists($offset);
@@ -1391,78 +1430,124 @@ EOF;
 class DoesNotExist extends Exception {}
 class ObjectNotUnique extends Exception {}
 
-abstract class ResultSet implements Iterator, ArrayAccess, Countable {
-    var $resource;
-    var $position = 0;
-    var $queryset;
-    var $cache = array();
+class CachedResultSet
+implements IteratorAggregate, Countable, ArrayAccess {
+    protected $inner;
+    protected $eoi = false;
+    protected $cache = array();
 
-    function __construct($queryset=false) {
-        $this->queryset = $queryset;
-        if ($queryset) {
-            $this->model = $queryset->model;
-        }
+    function __construct(IteratorAggregate $iterator) {
+        $this->inner = $iterator->getIterator();
     }
 
-    function prime() {
-        if (!isset($this->resource) && $this->queryset)
-            $this->resource = $this->queryset->getQuery();
+    function fillTo($level) {
+        while (!$this->eoi && count($this->cache) < $level) {
+            if (!$this->inner->valid()) {
+                $this->eoi = true;
+                break;
+            }
+            $this->cache[] = $this->inner->current();
+            $this->inner->next();
+        }
     }
 
-    abstract function fillTo($index);
-
     function asArray() {
         $this->fillTo(PHP_INT_MAX);
-        return $this->cache;
+        return $this;
     }
 
-    // Iterator interface
-    function rewind() {
-        $this->position = 0;
-    }
-    function current() {
-        $this->fillTo($this->position);
-        return $this->cache[$this->position];
-    }
-    function key() {
-        return $this->position;
-    }
-    function next() {
-        $this->position++;
+    function getCache() {
+        return $this->cache;
     }
-    function valid() {
-        $this->fillTo($this->position);
-        return count($this->cache) > $this->position;
+
+    function getIterator() {
+        $this->asArray();
+        return new ArrayIterator($this->cache);
     }
 
-    // ArrayAccess interface
     function offsetExists($offset) {
-        $this->fillTo($offset);
-        return $this->position >= $offset;
+        $this->fillTo($offset+1);
+        return count($this->cache) > $offset;
     }
     function offsetGet($offset) {
-        $this->fillTo($offset);
+        $this->fillTo($offset+1);
         return $this->cache[$offset];
     }
     function offsetUnset($a) {
-        throw new Exception(sprintf(__('%s is read-only'), get_class($this)));
+        throw new Exception(__('QuerySet is read-only'));
     }
     function offsetSet($a, $b) {
-        throw new Exception(sprintf(__('%s is read-only'), get_class($this)));
+        throw new Exception(__('QuerySet is read-only'));
     }
 
-    // Countable interface
     function count() {
-        return count($this->asArray());
+        $this->asArray();
+        return count($this->cache);
     }
 }
 
-class ModelInstanceManager extends ResultSet {
+class ModelResultSet
+extends CachedResultSet {
+    /**
+     * Find the first item in the current set which matches the given criteria.
+     * This would be used in favor of ::filter() which might trigger another
+     * database query. The criteria is intended to be quite simple and should
+     * not traverse relationships which have not already been fetched.
+     * Otherwise, the ::filter() or ::window() methods would provide better
+     * performance.
+     *
+     * Example:
+     * >>> $a = new User();
+     * >>> $a->roles->add(Role::lookup(['name' => 'administator']));
+     * >>> $a->roles->findFirst(['roles__name__startswith' => 'admin']);
+     * <Role: administrator>
+     */
+    function findFirst($criteria) {
+        $records = $this->findAll($criteria, 1);
+        return @$records[0];
+    }
+
+    /**
+     * Find all the items in the current set which match the given criteria.
+     * This would be used in favor of ::filter() which might trigger another
+     * database query. The criteria is intended to be quite simple and should
+     * not traverse relationships which have not already been fetched.
+     * Otherwise, the ::filter() or ::window() methods would provide better
+     * performance, as they can provide results with one more trip to the
+     * database.
+     */
+    function findAll($criteria, $limit=false) {
+        $records = array();
+        foreach ($this as $record) {
+            $matches = true;
+            foreach ($criteria as $field=>$check) {
+                if (!SqlCompiler::evaluate($record, $field, $check)) {
+                    $matches = false;
+                    break;
+                }
+            }
+            if ($matches)
+                $records[] = $record;
+            if ($limit && count($records) == $limit)
+                break;
+        }
+        return $records;
+    }
+}
+
+class ModelInstanceManager
+implements IteratorAggregate {
+    var $queryset;
     var $model;
     var $map;
 
     static $objectCache = array();
 
+    function __construct(QuerySet $queryset) {
+        $this->queryset = $queryset;
+        $this->model = $queryset->model;
+    }
+
     function cache($model) {
         $key = sprintf('%s.%s',
             $model::$meta->model, implode('.', $model->get('pk')));
@@ -1564,7 +1649,7 @@ class ModelInstanceManager extends ResultSet {
      * describes the relationship between the root model and this model,
      * 'user__account' for instance.
      */
-    function buildModel($row) {
+    function buildModel($row, $cache=true) {
         // TODO: Traverse to foreign keys
         if ($this->map) {
             if ($this->model != $this->map[0][1])
@@ -1577,7 +1662,7 @@ class ModelInstanceManager extends ResultSet {
                 $record = array_combine($fields, $values);
                 if (!$path) {
                     // Build the root model
-                    $model = $this->getOrBuild($this->model, $record);
+                    $model = $this->getOrBuild($this->model, $record, $cache);
                 }
                 elseif ($model) {
                     $i = 0;
@@ -1588,71 +1673,137 @@ class ModelInstanceManager extends ResultSet {
                         if (!($m = $m->get($field)))
                             break;
                     }
-                    if ($m)
-                        $m->set($tail, $this->getOrBuild($model_class, $record));
+                    if ($m) {
+                        // Only apply cache setting to the root model.
+                        // Reference models should use caching
+                        $m->set($tail, $this->getOrBuild($model_class, $record, $cache));
+                    }
                 }
                 $offset += count($fields);
             }
         }
         else {
-            $model = $this->getOrBuild($this->model, $row);
+            $model = $this->getOrBuild($this->model, $row, $cache);
         }
         return $model;
     }
 
-    function fillTo($index) {
-        $this->prime();
+    function getIterator() {
+        $this->resource = $this->queryset->getQuery();
+        $this->map = $this->resource->getMap();
+        $cache = !$this->queryset->hasOption(QuerySet::OPT_NOCACHE);
+        $this->resource->setBuffered($cache);
         $func = ($this->map) ? 'getRow' : 'getArray';
-        while ($this->resource && $index >= count($this->cache)) {
-            if ($row = $this->resource->{$func}()) {
-                $this->cache[] = $this->buildModel($row);
-            } else {
-                $this->resource->close();
-                $this->resource = false;
-                break;
-            }
-        }
+        $func = array($this->resource, $func);
+
+        return new CallbackSimpleIterator(function() use ($func, $cache) {
+            global $StopIteration;
+
+            if ($row = $func())
+                return $this->buildModel($row, $cache);
+
+            $this->resource->close();
+            throw $StopIteration;
+        });
+    }
+}
+
+class CallbackSimpleIterator
+implements Iterator {
+    var $current;
+    var $eoi;
+    var $callback;
+    var $key = -1;
+
+    function __construct($callback) {
+        assert(is_callable($callback));
+        $this->callback = $callback;
     }
 
-    function prime() {
-        parent::prime();
-        if ($this->resource) {
-            $this->map = $this->resource->getMap();
+    function rewind() {
+        $this->eoi = false;
+        $this->next();
+    }
+
+    function key() {
+        return $this->key;
+    }
+
+    function valid() {
+        if (!isset($this->eoi))
+            $this->rewind();
+        return !$this->eoi;
+    }
+
+    function current() {
+        if ($this->eoi) return false;
+        return $this->current;
+    }
+
+    function next() {
+        try {
+            $cbk = $this->callback;
+            $this->current = $cbk();
+            $this->key++;
+        }
+        catch (StopIteration $x) {
+            $this->eoi = true;
         }
     }
 }
 
-class FlatArrayIterator extends ResultSet {
-    function fillTo($index) {
-        $this->prime();
-        while ($this->resource && $index >= count($this->cache)) {
-            if ($row = $this->resource->getRow()) {
-                $this->cache[] = $row;
-            } else {
-                $this->resource->close();
-                $this->resource = false;
-                break;
-            }
-        }
+// Use a global variable, as constructing exceptions is expensive
+class StopIteration extends Exception {}
+$StopIteration = new StopIteration();
+
+class FlatArrayIterator
+implements IteratorAggregate {
+    var $queryset;
+    var $resource;
+
+    function __construct(QuerySet $queryset) {
+        $this->queryset = $queryset;
+    }
+
+    function getIterator() {
+        $this->resource = $this->queryset->getQuery();
+        return new CallbackSimpleIterator(function() {
+            global $StopIteration;
+
+            if ($row = $this->resource->getRow())
+                return $row;
+
+            $this->resource->close();
+            throw $StopIteration;
+        });
     }
 }
 
-class HashArrayIterator extends ResultSet {
-    function fillTo($index) {
-        $this->prime();
-        while ($this->resource && $index >= count($this->cache)) {
-            if ($row = $this->resource->getArray()) {
-                $this->cache[] = $row;
-            } else {
-                $this->resource->close();
-                $this->resource = false;
-                break;
-            }
-        }
+class HashArrayIterator
+implements IteratorAggregate {
+    var $queryset;
+    var $resource;
+
+    function __construct(QuerySet $queryset) {
+        $this->queryset = $queryset;
+    }
+
+    function getIterator() {
+        $this->resource = $this->queryset->getQuery();
+        return new CallbackSimpleIterator(function() {
+            global $StopIteration;
+
+            if ($row = $this->resource->getArray())
+                return $row;
+
+            $this->resource->close();
+            throw $StopIteration;
+        });
     }
 }
 
-class InstrumentedList extends ModelInstanceManager {
+class InstrumentedList
+extends ModelResultSet {
     var $key;
 
     function __construct($fkey, $queryset=false) {
@@ -1662,8 +1813,9 @@ class InstrumentedList extends ModelInstanceManager {
             if ($related = $model::getMeta('select_related'))
                 $queryset->select_related($related);
         }
-        parent::__construct($queryset);
+        parent::__construct(new ModelInstanceManager($queryset));
         $this->model = $model;
+        $this->queryset = $queryset;
     }
 
     function add($object, $at=false) {
@@ -1728,34 +1880,6 @@ class InstrumentedList extends ModelInstanceManager {
         return new static(array($this->model, $key), $this->filter($constraint));
     }
 
-    /**
-     * Find the first item in the current set which matches the given criteria.
-     * This would be used in favor of ::filter() which might trigger another
-     * database query. The criteria is intended to be quite simple and should
-     * not traverse relationships which have not already been fetched.
-     * Otherwise, the ::filter() or ::window() methods would provide better
-     * performance.
-     *
-     * Example:
-     * >>> $a = new User();
-     * >>> $a->roles->add(Role::lookup(['name' => 'administator']));
-     * >>> $a->roles->findFirst(['roles__name__startswith' => 'admin']);
-     * <Role: administrator>
-     */
-    function findFirst(array $criteria) {
-        foreach ($this as $record) {
-            $matches = true;
-            foreach ($criteria as $field=>$check) {
-                if (!SqlCompiler::evaluate($record, $field, $check)) {
-                    $matches = false;
-                    break;
-                }
-            }
-            if ($matches)
-                return $record;
-        }
-    }
-
     /**
      * Sort the instrumented list in place. This would be useful to change the
      * sorting order of the items in the list without fetching the list from
@@ -2784,6 +2908,9 @@ class MySqlExecutor {
     // queries
     var $map;
 
+    var $conn;
+    var $unbuffered = false;
+
     function __construct($sql, $params, $map=null) {
         $this->sql = $sql;
         $this->params = $params;
@@ -2794,6 +2921,14 @@ class MySqlExecutor {
         return $this->map;
     }
 
+    function setBuffered($buffered) {
+        $this->unbuffered = !$buffered;
+        if (!$buffered) {
+            // Execute this query in another session
+            $this->conn = Bootstrap::connect();
+        }
+    }
+
     function fixupParams() {
         $self = $this;
         $params = array();
@@ -2812,12 +2947,12 @@ class MySqlExecutor {
 
     function execute() {
         list($sql, $params) = $this->fixupParams();
-        if (!($this->stmt = db_prepare($sql)))
+        if (!($this->stmt = db_prepare($sql, $this->conn)))
             throw new InconsistentModelException(
-                'Unable to prepare query: '.db_error().' '.$sql);
+                'Unable to prepare query: '.db_error($this->conn).' '.$sql);
         if (count($params))
             $this->_bind($params);
-        if (!$this->stmt->execute() || ! $this->stmt->store_result()) {
+        if (!$this->stmt->execute() || !($this->unbuffered || $this->stmt->store_result())) {
             throw new OrmException('Unable to execute query: ' . $this->stmt->error);
         }
         return true;
diff --git a/include/mysqli.php b/include/mysqli.php
index 2a79feaa7..4ab5e8cf3 100644
--- a/include/mysqli.php
+++ b/include/mysqli.php
@@ -69,9 +69,12 @@ function db_connect($host, $user, $passwd, $options = array()) {
     if(isset($options['db'])) $__db->select_db($options['db']);
 
     //set desired encoding just in case mysql charset is not UTF-8 - Thanks to FreshMedia
-    @$__db->query('SET NAMES "utf8"');
-    @$__db->query('SET CHARACTER SET "utf8"');
-    @$__db->query('SET COLLATION_CONNECTION=utf8_general_ci');
+    @db_set_all(array(
+        'NAMES'                 => 'utf8',
+        'CHARACTER SET'         => 'utf8',
+        'COLLATION_CONNECTION'  => 'utf8_general_ci',
+        'SQL_MODE'              => '',
+    ), 'session');
     $__db->set_charset('utf8');
 
     @db_set_variable('sql_mode', '');
@@ -123,10 +126,30 @@ function db_get_variable($variable, $type='session') {
 }
 
 function db_set_variable($variable, $value, $type='session') {
-    $sql =sprintf('SET %s %s=%s',strtoupper($type), $variable, db_input($value));
-    return db_query($sql);
+    return db_set_all(array($variable => $value), $type);
 }
 
+function db_set_all($variables, $type='session') {
+    global $__db;
+
+    $set = array();
+    $type = strtoupper($type);
+    foreach ($variables as $k=>$v) {
+        $k = strtoupper($k);
+        $T = $type;
+        if (in_array($k, ['NAMES', 'CHARACTER SET'])) {
+            // MySQL doesn't support the session/global flag, and doesn't
+            // use an equal sign for these
+            $type = '';
+        }
+        else {
+            $k .= ' = ';
+        }
+        $set[] = "$type $k ".($__db->real_escape_string($v) ?: "''");
+    }
+    $sql = 'SET ' . implode(', ', $set);
+    return db_query($sql);
+}
 
 function db_select_database($database) {
     global $__db;
@@ -194,17 +217,6 @@ function db_query_unbuffered($sql, $logError=false) {
     return db_query($sql, $logError, true);
 }
 
-function db_squery($query) { //smart db query...utilizing args and sprintf
-
-    $args  = func_get_args();
-    $query = array_shift($args);
-    $query = str_replace("?", "%s", $query);
-    $args  = array_map('db_real_escape', $args);
-    array_unshift($args, $query);
-    $query = call_user_func_array('sprintf', $args);
-    return db_query($query);
-}
-
 function db_count($query) {
     return db_result(db_query($query));
 }
-- 
GitLab