417 lines
14 KiB
PHP
417 lines
14 KiB
PHP
<?php
|
|
/**
|
|
* Piwik - free/libre analytics platform
|
|
*
|
|
* @link http://piwik.org
|
|
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
|
*
|
|
*/
|
|
namespace Piwik\DataAccess;
|
|
|
|
use Piwik\Common;
|
|
use Piwik\Container\StaticContainer;
|
|
use Piwik\Db;
|
|
use Piwik\Plugin\Dimension\DimensionMetadataProvider;
|
|
use Piwik\Plugin\LogTablesProvider;
|
|
|
|
/**
|
|
* DAO that queries log tables.
|
|
*/
|
|
class RawLogDao
|
|
{
|
|
const DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME = 'tmp_log_actions_to_keep';
|
|
|
|
/**
|
|
* @var DimensionMetadataProvider
|
|
*/
|
|
private $dimensionMetadataProvider;
|
|
|
|
/**
|
|
* @var LogTablesProvider
|
|
*/
|
|
private $logTablesProvider;
|
|
|
|
public function __construct(DimensionMetadataProvider $provider = null, LogTablesProvider $logTablesProvider = null)
|
|
{
|
|
$this->dimensionMetadataProvider = $provider ?: StaticContainer::get('Piwik\Plugin\Dimension\DimensionMetadataProvider');
|
|
$this->logTablesProvider = $logTablesProvider ?: StaticContainer::get('Piwik\Plugin\LogTablesProvider');
|
|
}
|
|
|
|
/**
|
|
* @param array $values
|
|
* @param string $idVisit
|
|
*/
|
|
public function updateVisits(array $values, $idVisit)
|
|
{
|
|
$sql = "UPDATE " . Common::prefixTable('log_visit')
|
|
. " SET " . $this->getColumnSetExpressions(array_keys($values))
|
|
. " WHERE idvisit = ?";
|
|
|
|
$this->update($sql, $values, $idVisit);
|
|
}
|
|
|
|
/**
|
|
* @param array $values
|
|
* @param string $idVisit
|
|
*/
|
|
public function updateConversions(array $values, $idVisit)
|
|
{
|
|
$sql = "UPDATE " . Common::prefixTable('log_conversion')
|
|
. " SET " . $this->getColumnSetExpressions(array_keys($values))
|
|
. " WHERE idvisit = ?";
|
|
|
|
$this->update($sql, $values, $idVisit);
|
|
}
|
|
|
|
/**
|
|
* @param string $from
|
|
* @param string $to
|
|
* @return int
|
|
*/
|
|
public function countVisitsWithDatesLimit($from, $to)
|
|
{
|
|
$sql = "SELECT COUNT(*) AS num_rows"
|
|
. " FROM " . Common::prefixTable('log_visit')
|
|
. " WHERE visit_last_action_time >= ? AND visit_last_action_time < ?";
|
|
|
|
$bind = array($from, $to);
|
|
|
|
return (int) Db::fetchOne($sql, $bind);
|
|
}
|
|
|
|
/**
|
|
* Iterates over logs in a log table in chunks. Parameters to this function are as backend agnostic
|
|
* as possible w/o dramatically increasing code complexity.
|
|
*
|
|
* @param string $logTable The log table name. Unprefixed, eg, `log_visit`.
|
|
* @param array[] $conditions An array describing the conditions logs must match in the query. Translates to
|
|
* the WHERE part of a SELECT statement. Each element must contain three elements:
|
|
*
|
|
* * the column name
|
|
* * the operator (ie, '=', '<>', '<', etc.)
|
|
* * the operand (ie, a value)
|
|
*
|
|
* The elements are AND-ed together.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* array(
|
|
* array('visit_first_action_time', '>=', ...),
|
|
* array('visit_first_action_time', '<', ...)
|
|
* )
|
|
* ```
|
|
* @param int $iterationStep The number of rows to query at a time.
|
|
* @param callable $callback The callback that processes each chunk of rows.
|
|
*/
|
|
public function forAllLogs($logTable, $fields, $conditions, $iterationStep, $callback)
|
|
{
|
|
$idField = $this->getIdFieldForLogTable($logTable);
|
|
list($query, $bind) = $this->createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep);
|
|
|
|
$lastId = 0;
|
|
do {
|
|
$rows = Db::fetchAll($query, array_merge(array($lastId), $bind));
|
|
if (!empty($rows)) {
|
|
$lastId = $rows[count($rows) - 1][$idField];
|
|
|
|
$callback($rows);
|
|
}
|
|
} while (count($rows) == $iterationStep);
|
|
}
|
|
|
|
/**
|
|
* Deletes conversions for the supplied visit IDs from log_conversion. This method does not cascade, so
|
|
* conversion items will not be deleted.
|
|
*
|
|
* @param int[] $visitIds
|
|
* @return int The number of deleted rows.
|
|
*/
|
|
public function deleteFromLogTable($tableName, $visitIds)
|
|
{
|
|
$sql = "DELETE FROM `" . Common::prefixTable($tableName) . "` WHERE idvisit IN "
|
|
. $this->getInFieldExpressionWithInts($visitIds);
|
|
|
|
$statement = Db::query($sql);
|
|
return $statement->rowCount();
|
|
}
|
|
|
|
/**
|
|
* Deletes conversion items for the supplied visit IDs from log_conversion_item.
|
|
*
|
|
* @param int[] $visitIds
|
|
* @return int The number of deleted rows.
|
|
*/
|
|
public function deleteConversionItems($visitIds)
|
|
{
|
|
$sql = "DELETE FROM `" . Common::prefixTable('log_conversion_item') . "` WHERE idvisit IN "
|
|
. $this->getInFieldExpressionWithInts($visitIds);
|
|
|
|
$statement = Db::query($sql);
|
|
return $statement->rowCount();
|
|
}
|
|
|
|
/**
|
|
* Deletes all unused entries from the log_action table. This method uses a temporary table to store used
|
|
* actions, and then deletes rows from log_action that are not in this temporary table.
|
|
*
|
|
* Table locking is required to avoid concurrency issues.
|
|
*
|
|
* @throws \Exception If table locking permission is not granted to the current MySQL user.
|
|
*/
|
|
public function deleteUnusedLogActions()
|
|
{
|
|
if (!Db::isLockPrivilegeGranted()) {
|
|
throw new \Exception("RawLogDao.deleteUnusedLogActions() requires table locking permission in order to complete without error.");
|
|
}
|
|
|
|
// get current max ID in log tables w/ idaction references.
|
|
$maxIds = $this->getMaxIdsInLogTables();
|
|
|
|
$this->createTempTableForStoringUsedActions();
|
|
|
|
// do large insert (inserting everything before maxIds) w/o locking tables...
|
|
$this->insertActionsToKeep($maxIds, $deleteOlderThanMax = true);
|
|
|
|
// ... then do small insert w/ locked tables to minimize the amount of time tables are locked.
|
|
$this->lockLogTables();
|
|
$this->insertActionsToKeep($maxIds, $deleteOlderThanMax = false);
|
|
|
|
// delete before unlocking tables so there's no chance a new log row that references an
|
|
// unused action will be inserted.
|
|
$this->deleteUnusedActions();
|
|
|
|
Db::unlockAllTables();
|
|
|
|
$this->dropTempTableForStoringUsedActions();
|
|
}
|
|
|
|
/**
|
|
* Returns the list of the website IDs that received some visits between the specified timestamp.
|
|
*
|
|
* @param string $fromDateTime
|
|
* @param string $toDateTime
|
|
* @return bool true if there are visits for this site between the given timeframe, false if not
|
|
*/
|
|
public function hasSiteVisitsBetweenTimeframe($fromDateTime, $toDateTime, $idSite)
|
|
{
|
|
$sites = Db::fetchOne("SELECT 1
|
|
FROM " . Common::prefixTable('log_visit') . "
|
|
WHERE idsite = ?
|
|
AND visit_last_action_time > ?
|
|
AND visit_last_action_time < ?
|
|
LIMIT 1", array($idSite, $fromDateTime, $toDateTime));
|
|
|
|
return (bool) $sites;
|
|
}
|
|
|
|
/**
|
|
* @param array $columnsToSet
|
|
* @return string
|
|
*/
|
|
protected function getColumnSetExpressions(array $columnsToSet)
|
|
{
|
|
$columnsToSet = array_map(
|
|
function ($column) {
|
|
return $column . ' = ?';
|
|
},
|
|
$columnsToSet
|
|
);
|
|
|
|
return implode(', ', $columnsToSet);
|
|
}
|
|
|
|
/**
|
|
* @param array $values
|
|
* @param $idVisit
|
|
* @param $sql
|
|
* @return \Zend_Db_Statement
|
|
* @throws \Exception
|
|
*/
|
|
protected function update($sql, array $values, $idVisit)
|
|
{
|
|
return Db::query($sql, array_merge(array_values($values), array($idVisit)));
|
|
}
|
|
|
|
protected function getIdFieldForLogTable($logTable)
|
|
{
|
|
$idColumns = $this->getTableIdColumns();
|
|
|
|
if (isset($idColumns[$logTable])) {
|
|
return $idColumns[$logTable];
|
|
}
|
|
|
|
throw new \InvalidArgumentException("Unknown log table '$logTable'.");
|
|
}
|
|
|
|
// TODO: instead of creating a log query like this, we should re-use segments. to do this, however, there must be a 1-1
|
|
// mapping for dimensions => segments, and each dimension should automatically have a segment.
|
|
private function createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep)
|
|
{
|
|
$bind = array();
|
|
|
|
$sql = "SELECT " . implode(', ', $fields) . " FROM `" . Common::prefixTable($logTable) . "` WHERE $idField > ?";
|
|
|
|
foreach ($conditions as $condition) {
|
|
list($column, $operator, $value) = $condition;
|
|
|
|
if (is_array($value)) {
|
|
$sql .= " AND $column IN (" . Common::getSqlStringFieldsArray($value) . ")";
|
|
|
|
$bind = array_merge($bind, $value);
|
|
} else {
|
|
$sql .= " AND $column $operator ?";
|
|
|
|
$bind[] = $value;
|
|
}
|
|
}
|
|
|
|
$sql .= " ORDER BY $idField ASC LIMIT " . (int)$iterationStep;
|
|
|
|
return array($sql, $bind);
|
|
}
|
|
|
|
private function getInFieldExpressionWithInts($idVisits)
|
|
{
|
|
$sql = "(";
|
|
|
|
$isFirst = true;
|
|
foreach ($idVisits as $idVisit) {
|
|
if ($isFirst) {
|
|
$isFirst = false;
|
|
} else {
|
|
$sql .= ', ';
|
|
}
|
|
|
|
$sql .= (int)$idVisit;
|
|
}
|
|
|
|
$sql .= ")";
|
|
|
|
return $sql;
|
|
}
|
|
|
|
protected function getMaxIdsInLogTables()
|
|
{
|
|
$idColumns = $this->getTableIdColumns();
|
|
$tables = array_keys($idColumns);
|
|
|
|
$result = array();
|
|
foreach ($tables as $table) {
|
|
$idCol = $idColumns[$table];
|
|
$result[$table] = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
private function createTempTableForStoringUsedActions()
|
|
{
|
|
$sql = "CREATE TEMPORARY TABLE " . Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME) . " (
|
|
idaction INT(11),
|
|
PRIMARY KEY (idaction)
|
|
)";
|
|
Db::query($sql);
|
|
}
|
|
|
|
private function dropTempTableForStoringUsedActions()
|
|
{
|
|
$sql = "DROP TABLE " . Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
|
|
Db::query($sql);
|
|
}
|
|
|
|
// protected for testing purposes
|
|
protected function insertActionsToKeep($maxIds, $olderThan = true, $insertIntoTempIterationStep = 100000)
|
|
{
|
|
$tempTableName = Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
|
|
|
|
$idColumns = $this->getTableIdColumns();
|
|
foreach ($this->dimensionMetadataProvider->getActionReferenceColumnsByTable() as $table => $columns) {
|
|
$idCol = $idColumns[$table];
|
|
// Create select query for requesting ALL needed fields at once
|
|
$sql = "SELECT " . implode(',' ,$columns) . " FROM " . Common::prefixTable($table) . " WHERE $idCol >= ? AND $idCol < ?";
|
|
|
|
if ($olderThan) {
|
|
// Why start on zero? When running for a couple of months, this will generate about 10000+ queries with zero result. Use the lowest value instead.... saves a LOT of waiting time!
|
|
$start = (int) Db::fetchOne("SELECT MIN($idCol) FROM " . Common::prefixTable($table));;
|
|
$finish = $maxIds[$table];
|
|
} else {
|
|
$start = $maxIds[$table];
|
|
$finish = (int) Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
|
|
}
|
|
// Borrowed from Db::segmentedFetchAll
|
|
// Request records per $insertIntoTempIterationStep amount
|
|
// Loop over the result set, mapping all numeric fields in a single insert query
|
|
|
|
// Insert query would be: INSERT IGNORE INTO [temp_table] VALUES (X),(Y),(Z) depending on the amount of fields requested per row
|
|
for ($i = $start; $i <= $finish; $i += $insertIntoTempIterationStep) {
|
|
$currentParams = array($i, $i + $insertIntoTempIterationStep);
|
|
$result = Db::fetchAll($sql, $currentParams);
|
|
// Now we loop over the result set of max $insertIntoTempIterationStep rows and create insert queries
|
|
$keepValues = [];
|
|
foreach ($result as $row) {
|
|
$keepValues = array_merge($keepValues, array_filter(array_values($row), "is_numeric"));
|
|
if (count($keepValues) >= 1000) {
|
|
$insert = 'INSERT IGNORE INTO ' . $tempTableName .' VALUES (';
|
|
$insert .= implode('),(', $keepValues);
|
|
$insert .= ')';
|
|
|
|
Db::exec($insert);
|
|
$keepValues = [];
|
|
}
|
|
}
|
|
|
|
$insert = 'INSERT IGNORE INTO ' . $tempTableName .' VALUES (';
|
|
$insert .= implode('),(', $keepValues);
|
|
$insert .= ')';
|
|
|
|
Db::exec($insert);
|
|
}
|
|
}
|
|
}
|
|
|
|
private function lockLogTables()
|
|
{
|
|
$tables = $this->getTableIdColumns();
|
|
unset($tables['log_action']); // we write lock it
|
|
$tableNames = array_keys($tables);
|
|
|
|
$readLocks = array();
|
|
foreach ($tableNames as $tableName) {
|
|
$readLocks[] = Common::prefixTable($tableName);
|
|
}
|
|
|
|
Db::lockTables(
|
|
$readLocks,
|
|
$writeLocks = Common::prefixTables('log_action')
|
|
);
|
|
}
|
|
|
|
private function deleteUnusedActions()
|
|
{
|
|
list($logActionTable, $tempTableName) = Common::prefixTables("log_action", self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
|
|
|
|
$deleteSql = "DELETE LOW_PRIORITY QUICK IGNORE $logActionTable
|
|
FROM $logActionTable
|
|
LEFT JOIN $tempTableName tmp ON tmp.idaction = $logActionTable.idaction
|
|
WHERE tmp.idaction IS NULL";
|
|
|
|
Db::query($deleteSql);
|
|
}
|
|
|
|
protected function getTableIdColumns()
|
|
{
|
|
$columns = array();
|
|
|
|
foreach ($this->logTablesProvider->getAllLogTables() as $logTable) {
|
|
$idColumn = $logTable->getIdColumn();
|
|
|
|
if (!empty($idColumn)) {
|
|
$columns[$logTable->getName()] = $idColumn;
|
|
}
|
|
}
|
|
|
|
return $columns;
|
|
}
|
|
}
|