PDF rausgenommen

This commit is contained in:
aschwarz
2023-01-23 11:03:31 +01:00
parent 82d562a322
commit a6523903eb
28078 changed files with 4247552 additions and 2 deletions

View File

@ -0,0 +1,34 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
namespace Piwik\DataAccess;
use Piwik\Db;
use Piwik\Common;
/**
* Data Access Object for operations dealing with the log_action table.
*/
class Actions
{
/**
* Removes a list of actions from the log_action table by ID.
*
* @param int[] $idActions
*/
public function delete($idActions)
{
foreach ($idActions as &$id) {
$id = (int)$id;
}
$table = Common::prefixTable('log_action');
$sql = "DELETE FROM $table WHERE idaction IN (" . implode(",", $idActions) . ")";
Db::query($sql);
}
}

View File

@ -0,0 +1,371 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess;
use Exception;
use Piwik\Archive;
use Piwik\Archive\Chunk;
use Piwik\ArchiveProcessor;
use Piwik\ArchiveProcessor\Rules;
use Piwik\Common;
use Piwik\Date;
use Piwik\Db;
use Piwik\Period;
use Piwik\Period\Range;
use Piwik\Segment;
/**
* Data Access object used to query archives
*
* A record in the Database for a given report is defined by
* - idarchive = unique ID that is associated to all the data of this archive (idsite+period+date)
* - idsite = the ID of the website
* - date1 = starting day of the period
* - date2 = ending day of the period
* - period = integer that defines the period (day/week/etc.). @see period::getId()
* - ts_archived = timestamp when the archive was processed (UTC)
* - name = the name of the report (ex: uniq_visitors or search_keywords_by_search_engines)
* - value = the actual data (a numeric value, or a blob of compressed serialized data)
*
*/
class ArchiveSelector
{
const NB_VISITS_RECORD_LOOKED_UP = "nb_visits";
const NB_VISITS_CONVERTED_RECORD_LOOKED_UP = "nb_visits_converted";
private static function getModel()
{
return new Model();
}
public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params, $minDatetimeArchiveProcessedUTC)
{
$idSite = $params->getSite()->getId();
$period = $params->getPeriod()->getId();
$dateStart = $params->getPeriod()->getDateStart();
$dateStartIso = $dateStart->toString('Y-m-d');
$dateEndIso = $params->getPeriod()->getDateEnd()->toString('Y-m-d');
$numericTable = ArchiveTableCreator::getNumericTable($dateStart);
$minDatetimeIsoArchiveProcessedUTC = null;
if ($minDatetimeArchiveProcessedUTC) {
$minDatetimeIsoArchiveProcessedUTC = Date::factory($minDatetimeArchiveProcessedUTC)->getDatetime();
}
$requestedPlugin = $params->getRequestedPlugin();
$segment = $params->getSegment();
$plugins = array("VisitsSummary", $requestedPlugin);
$doneFlags = Rules::getDoneFlags($plugins, $segment);
$doneFlagValues = Rules::getSelectableDoneFlagValues();
$results = self::getModel()->getArchiveIdAndVisits($numericTable, $idSite, $period, $dateStartIso, $dateEndIso, $minDatetimeIsoArchiveProcessedUTC, $doneFlags, $doneFlagValues);
if (empty($results)) {
return false;
}
$idArchive = self::getMostRecentIdArchiveFromResults($segment, $requestedPlugin, $results);
$idArchiveVisitsSummary = self::getMostRecentIdArchiveFromResults($segment, "VisitsSummary", $results);
list($visits, $visitsConverted) = self::getVisitsMetricsFromResults($idArchive, $idArchiveVisitsSummary, $results);
if (false === $visits && false === $idArchive) {
return false;
}
return array($idArchive, $visits, $visitsConverted);
}
protected static function getVisitsMetricsFromResults($idArchive, $idArchiveVisitsSummary, $results)
{
$visits = $visitsConverted = false;
$archiveWithVisitsMetricsWasFound = ($idArchiveVisitsSummary !== false);
if ($archiveWithVisitsMetricsWasFound) {
$visits = $visitsConverted = 0;
}
foreach ($results as $result) {
if (in_array($result['idarchive'], array($idArchive, $idArchiveVisitsSummary))) {
$value = (int)$result['value'];
if (empty($visits)
&& $result['name'] == self::NB_VISITS_RECORD_LOOKED_UP
) {
$visits = $value;
}
if (empty($visitsConverted)
&& $result['name'] == self::NB_VISITS_CONVERTED_RECORD_LOOKED_UP
) {
$visitsConverted = $value;
}
}
}
return array($visits, $visitsConverted);
}
protected static function getMostRecentIdArchiveFromResults(Segment $segment, $requestedPlugin, $results)
{
$idArchive = false;
$namesRequestedPlugin = Rules::getDoneFlags(array($requestedPlugin), $segment);
foreach ($results as $result) {
if ($idArchive === false
&& in_array($result['name'], $namesRequestedPlugin)
) {
$idArchive = $result['idarchive'];
break;
}
}
return $idArchive;
}
/**
* Queries and returns archive IDs for a set of sites, periods, and a segment.
*
* @param array $siteIds
* @param array $periods
* @param Segment $segment
* @param array $plugins List of plugin names for which data is being requested.
* @return array Archive IDs are grouped by archive name and period range, ie,
* array(
* 'VisitsSummary.done' => array(
* '2010-01-01' => array(1,2,3)
* )
* )
* @throws
*/
public static function getArchiveIds($siteIds, $periods, $segment, $plugins)
{
if (empty($siteIds)) {
throw new \Exception("Website IDs could not be read from the request, ie. idSite=");
}
foreach ($siteIds as $index => $siteId) {
$siteIds[$index] = (int) $siteId;
}
$getArchiveIdsSql = "SELECT idsite, name, date1, date2, MAX(idarchive) as idarchive
FROM %s
WHERE idsite IN (" . implode(',', $siteIds) . ")
AND " . self::getNameCondition($plugins, $segment) . "
AND %s
GROUP BY idsite, date1, date2, name";
$monthToPeriods = array();
foreach ($periods as $period) {
/** @var Period $period */
if ($period->getDateStart()->isLater(Date::now()->addDay(2))) {
continue; // avoid creating any archive tables in the future
}
$table = ArchiveTableCreator::getNumericTable($period->getDateStart());
$monthToPeriods[$table][] = $period;
}
// for every month within the archive query, select from numeric table
$result = array();
foreach ($monthToPeriods as $table => $periods) {
$firstPeriod = reset($periods);
$bind = array();
if ($firstPeriod instanceof Range) {
$dateCondition = "date1 = ? AND date2 = ?";
$bind[] = $firstPeriod->getDateStart()->toString('Y-m-d');
$bind[] = $firstPeriod->getDateEnd()->toString('Y-m-d');
} else {
// we assume there is no range date in $periods
$dateCondition = '(';
foreach ($periods as $period) {
if (strlen($dateCondition) > 1) {
$dateCondition .= ' OR ';
}
$dateCondition .= "(period = ? AND date1 = ? AND date2 = ?)";
$bind[] = $period->getId();
$bind[] = $period->getDateStart()->toString('Y-m-d');
$bind[] = $period->getDateEnd()->toString('Y-m-d');
}
$dateCondition .= ')';
}
$sql = sprintf($getArchiveIdsSql, $table, $dateCondition);
$archiveIds = Db::fetchAll($sql, $bind);
// get the archive IDs
foreach ($archiveIds as $row) {
//FIXMEA duplicate with Archive.php
$dateStr = $row['date1'] . ',' . $row['date2'];
$result[$row['name']][$dateStr][] = $row['idarchive'];
}
}
return $result;
}
/**
* Queries and returns archive data using a set of archive IDs.
*
* @param array $archiveIds The IDs of the archives to get data from.
* @param array $recordNames The names of the data to retrieve (ie, nb_visits, nb_actions, etc.).
* Note: You CANNOT pass multiple recordnames if $loadAllSubtables=true.
* @param string $archiveDataType The archive data type (either, 'blob' or 'numeric').
* @param int|null|string $idSubtable null if the root blob should be loaded, an integer if a subtable should be
* loaded and 'all' if all subtables should be loaded.
* @throws Exception
* @return array
*/
public static function getArchiveData($archiveIds, $recordNames, $archiveDataType, $idSubtable)
{
$chunk = new Chunk();
// create the SQL to select archive data
$loadAllSubtables = $idSubtable == Archive::ID_SUBTABLE_LOAD_ALL_SUBTABLES;
if ($loadAllSubtables) {
$name = reset($recordNames);
// select blobs w/ name like "$name_[0-9]+" w/o using RLIKE
$nameEnd = strlen($name) + 1;
$nameEndAppendix = $nameEnd + 1;
$appendix = $chunk->getAppendix();
$lenAppendix = strlen($appendix);
$checkForChunkBlob = "SUBSTRING(name, $nameEnd, $lenAppendix) = '$appendix'";
$checkForSubtableId = "(SUBSTRING(name, $nameEndAppendix, 1) >= '0'
AND SUBSTRING(name, $nameEndAppendix, 1) <= '9')";
$whereNameIs = "(name = ? OR (name LIKE ? AND ( $checkForChunkBlob OR $checkForSubtableId ) ))";
$bind = array($name, $name . '%');
} else {
if ($idSubtable === null) {
// select root table or specific record names
$bind = array_values($recordNames);
} else {
// select a subtable id
$bind = array();
foreach ($recordNames as $recordName) {
// to be backwards compatibe we need to look for the exact idSubtable blob and for the chunk
// that stores the subtables (a chunk stores many blobs in one blob)
$bind[] = $chunk->getRecordNameForTableId($recordName, $idSubtable);
$bind[] = self::appendIdSubtable($recordName, $idSubtable);
}
}
$inNames = Common::getSqlStringFieldsArray($bind);
$whereNameIs = "name IN ($inNames)";
}
$getValuesSql = "SELECT value, name, idsite, date1, date2, ts_archived
FROM %s
WHERE idarchive IN (%s)
AND " . $whereNameIs;
// get data from every table we're querying
$rows = array();
foreach ($archiveIds as $period => $ids) {
if (empty($ids)) {
throw new Exception("Unexpected: id archive not found for period '$period' '");
}
// $period = "2009-01-04,2009-01-04",
$date = Date::factory(substr($period, 0, 10));
$isNumeric = $archiveDataType == 'numeric';
if ($isNumeric) {
$table = ArchiveTableCreator::getNumericTable($date);
} else {
$table = ArchiveTableCreator::getBlobTable($date);
}
$sql = sprintf($getValuesSql, $table, implode(',', $ids));
$dataRows = Db::fetchAll($sql, $bind);
foreach ($dataRows as $row) {
if ($isNumeric) {
$rows[] = $row;
} else {
$row['value'] = self::uncompress($row['value']);
if ($chunk->isRecordNameAChunk($row['name'])) {
self::moveChunkRowToRows($rows, $row, $chunk, $loadAllSubtables, $idSubtable);
} else {
$rows[] = $row;
}
}
}
}
return $rows;
}
private static function moveChunkRowToRows(&$rows, $row, Chunk $chunk, $loadAllSubtables, $idSubtable)
{
// $blobs = array([subtableID] = [blob of subtableId])
$blobs = Common::safe_unserialize($row['value']);
if (!is_array($blobs)) {
return;
}
// $rawName = eg 'PluginName_ArchiveName'
$rawName = $chunk->getRecordNameWithoutChunkAppendix($row['name']);
if ($loadAllSubtables) {
foreach ($blobs as $subtableId => $blob) {
$row['value'] = $blob;
$row['name'] = self::appendIdSubtable($rawName, $subtableId);
$rows[] = $row;
}
} elseif (array_key_exists($idSubtable, $blobs)) {
$row['value'] = $blobs[$idSubtable];
$row['name'] = self::appendIdSubtable($rawName, $idSubtable);
$rows[] = $row;
}
}
public static function appendIdSubtable($recordName, $id)
{
return $recordName . "_" . $id;
}
private static function uncompress($data)
{
return @gzuncompress($data);
}
/**
* Returns the SQL condition used to find successfully completed archives that
* this instance is querying for.
*
* @param array $plugins
* @param Segment $segment
* @return string
*/
private static function getNameCondition(array $plugins, Segment $segment)
{
// the flags used to tell how the archiving process for a specific archive was completed,
// if it was completed
$doneFlags = Rules::getDoneFlags($plugins, $segment);
$allDoneFlags = "'" . implode("','", $doneFlags) . "'";
$possibleValues = Rules::getSelectableDoneFlagValues();
// create the SQL to find archives that are DONE
return "((name IN ($allDoneFlags)) AND (value IN (" . implode(',', $possibleValues) . ")))";
}
}

View File

@ -0,0 +1,123 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess;
use Piwik\Common;
use Piwik\Date;
use Piwik\DbHelper;
class ArchiveTableCreator
{
const NUMERIC_TABLE = "numeric";
const BLOB_TABLE = "blob";
public static $tablesAlreadyInstalled = null;
public static function getNumericTable(Date $date)
{
return self::getTable($date, self::NUMERIC_TABLE);
}
public static function getBlobTable(Date $date)
{
return self::getTable($date, self::BLOB_TABLE);
}
protected static function getTable(Date $date, $type)
{
$tableNamePrefix = "archive_" . $type;
$tableName = $tableNamePrefix . "_" . self::getTableMonthFromDate($date);
$tableName = Common::prefixTable($tableName);
self::createArchiveTablesIfAbsent($tableName, $tableNamePrefix);
return $tableName;
}
protected static function createArchiveTablesIfAbsent($tableName, $tableNamePrefix)
{
if (is_null(self::$tablesAlreadyInstalled)) {
self::refreshTableList();
}
if (!in_array($tableName, self::$tablesAlreadyInstalled)) {
self::getModel()->createArchiveTable($tableName, $tableNamePrefix);
self::$tablesAlreadyInstalled[] = $tableName;
}
}
private static function getModel()
{
return new Model();
}
public static function clear()
{
self::$tablesAlreadyInstalled = null;
}
public static function refreshTableList($forceReload = false)
{
self::$tablesAlreadyInstalled = DbHelper::getTablesInstalled($forceReload);
}
/**
* Returns all table names archive_*
*
* @param string $type The type of table to return. Either `self::NUMERIC_TABLE` or `self::BLOB_TABLE`.
* @return array
*/
public static function getTablesArchivesInstalled($type = null)
{
if (is_null(self::$tablesAlreadyInstalled)) {
self::refreshTableList();
}
if (empty($type)) {
$tableMatchRegex = '/archive_(numeric|blob)_/';
} else {
$tableMatchRegex = '/archive_' . preg_quote($type) . '_/';
}
$archiveTables = array();
foreach (self::$tablesAlreadyInstalled as $table) {
if (preg_match($tableMatchRegex, $table)) {
$archiveTables[] = $table;
}
}
return $archiveTables;
}
public static function getDateFromTableName($tableName)
{
$tableName = Common::unprefixTable($tableName);
$date = str_replace(array('archive_numeric_', 'archive_blob_'), '', $tableName);
return $date;
}
public static function getTableMonthFromDate(Date $date)
{
return $date->toString('Y_m');
}
public static function getTypeFromTableName($tableName)
{
if (strpos($tableName, 'archive_numeric_') !== false) {
return self::NUMERIC_TABLE;
}
if (strpos($tableName, 'archive_blob_') !== false) {
return self::BLOB_TABLE;
}
return false;
}
}

View File

@ -0,0 +1,90 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
namespace Piwik\DataAccess;
use Piwik\Common;
use Piwik\Config;
use Piwik\Db;
/**
* Data Access class for querying numeric & blob archive tables.
*/
class ArchiveTableDao
{
/**
* Analyzes numeric & blob tables for a single table date (ie, `'2015_01'`) and returns
* statistics including:
*
* - number of archives present
* - number of invalidated archives
* - number of temporary archives
* - number of error archives
* - number of segment archives
* - number of numeric rows
* - number of blob rows
*
* @param string $tableDate ie `'2015_01'`
* @return array
*/
public function getArchiveTableAnalysis($tableDate)
{
$numericQueryEmptyRow = array(
'count_archives' => '-',
'count_invalidated_archives' => '-',
'count_temporary_archives' => '-',
'count_error_archives' => '-',
'count_segment_archives' => '-',
'count_numeric_rows' => '-',
);
$tableDate = str_replace("`", "", $tableDate); // for sanity
$numericTable = Common::prefixTable("archive_numeric_$tableDate");
$blobTable = Common::prefixTable("archive_blob_$tableDate");
// query numeric table
$sql = "SELECT CONCAT_WS('.', idsite, date1, date2, period) AS label,
SUM(CASE WHEN name LIKE 'done%' THEN 1 ELSE 0 END) AS count_archives,
SUM(CASE WHEN name LIKE 'done%' AND value = ? THEN 1 ELSE 0 END) AS count_invalidated_archives,
SUM(CASE WHEN name LIKE 'done%' AND value = ? THEN 1 ELSE 0 END) AS count_temporary_archives,
SUM(CASE WHEN name LIKE 'done%' AND value = ? THEN 1 ELSE 0 END) AS count_error_archives,
SUM(CASE WHEN name LIKE 'done%' AND CHAR_LENGTH(name) > 32 THEN 1 ELSE 0 END) AS count_segment_archives,
SUM(CASE WHEN name NOT LIKE 'done%' THEN 1 ELSE 0 END) AS count_numeric_rows,
0 AS count_blob_rows
FROM `$numericTable`
GROUP BY idsite, date1, date2, period";
$rows = Db::fetchAll($sql, array(ArchiveWriter::DONE_INVALIDATED, ArchiveWriter::DONE_OK_TEMPORARY,
ArchiveWriter::DONE_ERROR));
// index result
$result = array();
foreach ($rows as $row) {
$result[$row['label']] = $row;
}
// query blob table & manually merge results (no FULL OUTER JOIN in mysql)
$sql = "SELECT CONCAT_WS('.', idsite, date1, date2, period) AS label,
COUNT(*) AS count_blob_rows,
SUM(OCTET_LENGTH(value)) AS sum_blob_length
FROM `$blobTable`
GROUP BY idsite, date1, date1, period";
foreach (Db::fetchAll($sql) as $blobStatsRow) {
$label = $blobStatsRow['label'];
if (isset($result[$label])) {
$result[$label] = array_merge($result[$label], $blobStatsRow);
} else {
$result[$label] = $blobStatsRow + $numericQueryEmptyRow;
}
}
return $result;
}
}

View File

@ -0,0 +1,276 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess;
use Exception;
use Piwik\Archive;
use Piwik\Archive\Chunk;
use Piwik\ArchiveProcessor\Rules;
use Piwik\ArchiveProcessor;
use Piwik\Db;
use Piwik\Db\BatchInsert;
use Piwik\Period;
/**
* This class is used to create a new Archive.
* An Archive is a set of reports (numeric and data tables).
* New data can be inserted in the archive with insertRecord/insertBulkRecords
*/
class ArchiveWriter
{
/**
* Flag stored at the end of the archiving
*
* @var int
*/
const DONE_OK = 1;
/**
* Flag stored at the start of the archiving
* When requesting an Archive, we make sure that non-finished archive are not considered valid
*
* @var int
*/
const DONE_ERROR = 2;
/**
* Flag indicates the archive is over a period that is not finished, eg. the current day, current week, etc.
* Archives flagged will be regularly purged from the DB.
*
* @var int
*/
const DONE_OK_TEMPORARY = 3;
/**
* Flag indicated that archive is done but was marked as invalid later and needs to be re-processed during next archiving process
*
* @var int
*/
const DONE_INVALIDATED = 4;
protected $fields = array('idarchive',
'idsite',
'date1',
'date2',
'period',
'ts_archived',
'name',
'value');
public function __construct(ArchiveProcessor\Parameters $params, $isArchiveTemporary)
{
$this->idArchive = false;
$this->idSite = $params->getSite()->getId();
$this->segment = $params->getSegment();
$this->period = $params->getPeriod();
$idSites = array($this->idSite);
$this->doneFlag = Rules::getDoneStringFlagFor($idSites, $this->segment, $this->period->getLabel(), $params->getRequestedPlugin());
$this->isArchiveTemporary = $isArchiveTemporary;
$this->dateStart = $this->period->getDateStart();
}
/**
* @param string $name
* @param string|string[] $values A blob string or an array of blob strings. If an array
* is used, the first element in the array will be inserted
* with the `$name` name. The others will be splitted into chunks. All subtables
* within one chunk will be serialized as an array where the index is the
* subtableId.
*/
public function insertBlobRecord($name, $values)
{
if (is_array($values)) {
$clean = array();
if (isset($values[0])) {
// we always store the root table in a single blob for fast access
$clean[] = array($name, $this->compress($values[0]));
unset($values[0]);
}
if (!empty($values)) {
// we move all subtables into chunks
$chunk = new Chunk();
$chunks = $chunk->moveArchiveBlobsIntoChunks($name, $values);
foreach ($chunks as $index => $subtables) {
$clean[] = array($index, $this->compress(serialize($subtables)));
}
}
$this->insertBulkRecords($clean);
return;
}
$values = $this->compress($values);
$this->insertRecord($name, $values);
}
public function getIdArchive()
{
if ($this->idArchive === false) {
throw new Exception("Must call allocateNewArchiveId() first");
}
return $this->idArchive;
}
public function initNewArchive()
{
$this->allocateNewArchiveId();
$this->logArchiveStatusAsIncomplete();
}
public function finalizeArchive()
{
$numericTable = $this->getTableNumeric();
$idArchive = $this->getIdArchive();
$this->getModel()->deletePreviousArchiveStatus($numericTable, $idArchive, $this->doneFlag);
$this->logArchiveStatusAsFinal();
}
protected function compress($data)
{
if (Db::get()->hasBlobDataType()) {
return gzcompress($data);
}
return $data;
}
protected function allocateNewArchiveId()
{
$numericTable = $this->getTableNumeric();
$this->idArchive = $this->getModel()->allocateNewArchiveId($numericTable);
return $this->idArchive;
}
private function getModel()
{
return new Model();
}
protected function logArchiveStatusAsIncomplete()
{
$this->insertRecord($this->doneFlag, self::DONE_ERROR);
}
protected function logArchiveStatusAsFinal()
{
$status = self::DONE_OK;
if ($this->isArchiveTemporary) {
$status = self::DONE_OK_TEMPORARY;
}
$this->insertRecord($this->doneFlag, $status);
}
protected function insertBulkRecords($records)
{
// Using standard plain INSERT if there is only one record to insert
if ($DEBUG_DO_NOT_USE_BULK_INSERT = false
|| count($records) == 1
) {
foreach ($records as $record) {
$this->insertRecord($record[0], $record[1]);
}
return true;
}
$bindSql = $this->getInsertRecordBind();
$values = array();
$valueSeen = false;
foreach ($records as $record) {
// don't record zero
if (empty($record[1])) {
continue;
}
$bind = $bindSql;
$bind[] = $record[0]; // name
$bind[] = $record[1]; // value
$values[] = $bind;
$valueSeen = $record[1];
}
if (empty($values)) {
return true;
}
$tableName = $this->getTableNameToInsert($valueSeen);
$fields = $this->getInsertFields();
BatchInsert::tableInsertBatch($tableName, $fields, $values, $throwException = false, $charset = 'latin1');
return true;
}
/**
* Inserts a record in the right table (either NUMERIC or BLOB)
*
* @param string $name
* @param mixed $value
*
* @return bool
*/
public function insertRecord($name, $value)
{
if ($this->isRecordZero($value)) {
return false;
}
$tableName = $this->getTableNameToInsert($value);
$fields = $this->getInsertFields();
$record = $this->getInsertRecordBind();
$this->getModel()->insertRecord($tableName, $fields, $record, $name, $value);
return true;
}
protected function getInsertRecordBind()
{
return array($this->getIdArchive(),
$this->idSite,
$this->dateStart->toString('Y-m-d'),
$this->period->getDateEnd()->toString('Y-m-d'),
$this->period->getId(),
date("Y-m-d H:i:s"));
}
protected function getTableNameToInsert($value)
{
if (is_numeric($value)) {
return $this->getTableNumeric();
}
return ArchiveTableCreator::getBlobTable($this->dateStart);
}
protected function getTableNumeric()
{
return ArchiveTableCreator::getNumericTable($this->dateStart);
}
protected function getInsertFields()
{
return $this->fields;
}
protected function isRecordZero($value)
{
return ($value === '0' || $value === false || $value === 0 || $value === 0.0);
}
}

View File

@ -0,0 +1,961 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess;
use Piwik\ArchiveProcessor\Parameters;
use Piwik\Common;
use Piwik\Container\StaticContainer;
use Piwik\DataArray;
use Piwik\Date;
use Piwik\Db;
use Piwik\Metrics;
use Piwik\Period;
use Piwik\Tracker\GoalManager;
use Psr\Log\LoggerInterface;
/**
* Contains methods that calculate metrics by aggregating log data (visits, actions, conversions,
* ecommerce items).
*
* You can use the methods in this class within {@link Piwik\Plugin\Archiver Archiver} descendants
* to aggregate log data without having to write SQL queries.
*
* ### Aggregation Dimension
*
* All aggregation methods accept a **dimension** parameter. These parameters are important as
* they control how rows in a table are aggregated together.
*
* A **_dimension_** is just a table column. Rows that have the same values for these columns are
* aggregated together. The result of these aggregations is a set of metrics for every recorded value
* of a **dimension**.
*
* _Note: A dimension is essentially the same as a **GROUP BY** field._
*
* ### Examples
*
* **Aggregating visit data**
*
* $archiveProcessor = // ...
* $logAggregator = $archiveProcessor->getLogAggregator();
*
* // get metrics for every used browser language of all visits by returning visitors
* $query = $logAggregator->queryVisitsByDimension(
* $dimensions = array('log_visit.location_browser_lang'),
* $where = 'log_visit.visitor_returning = 1',
*
* // also count visits for each browser language that are not located in the US
* $additionalSelects = array('sum(case when log_visit.location_country <> 'us' then 1 else 0 end) as nonus'),
*
* // we're only interested in visits, unique visitors & actions, so don't waste time calculating anything else
* $metrics = array(Metrics::INDEX_NB_UNIQ_VISITORS, Metrics::INDEX_NB_VISITS, Metrics::INDEX_NB_ACTIONS),
* );
* if ($query === false) {
* return;
* }
*
* while ($row = $query->fetch()) {
* $uniqueVisitors = $row[Metrics::INDEX_NB_UNIQ_VISITORS];
* $visits = $row[Metrics::INDEX_NB_VISITS];
* $actions = $row[Metrics::INDEX_NB_ACTIONS];
*
* // ... do something w/ calculated metrics ...
* }
*
* **Aggregating conversion data**
*
* $archiveProcessor = // ...
* $logAggregator = $archiveProcessor->getLogAggregator();
*
* // get metrics for ecommerce conversions for each country
* $query = $logAggregator->queryConversionsByDimension(
* $dimensions = array('log_conversion.location_country'),
* $where = 'log_conversion.idgoal = 0', // 0 is the special ecommerceOrder idGoal value in the table
*
* // also calculate average tax and max shipping per country
* $additionalSelects = array(
* 'AVG(log_conversion.revenue_tax) as avg_tax',
* 'MAX(log_conversion.revenue_shipping) as max_shipping'
* )
* );
* if ($query === false) {
* return;
* }
*
* while ($row = $query->fetch()) {
* $country = $row['location_country'];
* $numEcommerceSales = $row[Metrics::INDEX_GOAL_NB_CONVERSIONS];
* $numVisitsWithEcommerceSales = $row[Metrics::INDEX_GOAL_NB_VISITS_CONVERTED];
* $avgTaxForCountry = $row['avg_tax'];
* $maxShippingForCountry = $row['max_shipping'];
*
* // ... do something with aggregated data ...
* }
*/
class LogAggregator
{
const LOG_VISIT_TABLE = 'log_visit';
const LOG_ACTIONS_TABLE = 'log_link_visit_action';
const LOG_CONVERSION_TABLE = "log_conversion";
const REVENUE_SUBTOTAL_FIELD = 'revenue_subtotal';
const REVENUE_TAX_FIELD = 'revenue_tax';
const REVENUE_SHIPPING_FIELD = 'revenue_shipping';
const REVENUE_DISCOUNT_FIELD = 'revenue_discount';
const TOTAL_REVENUE_FIELD = 'revenue';
const ITEMS_COUNT_FIELD = "items";
const CONVERSION_DATETIME_FIELD = "server_time";
const ACTION_DATETIME_FIELD = "server_time";
const VISIT_DATETIME_FIELD = 'visit_last_action_time';
const IDGOAL_FIELD = 'idgoal';
const FIELDS_SEPARATOR = ", \n\t\t\t";
/** @var \Piwik\Date */
protected $dateStart;
/** @var \Piwik\Date */
protected $dateEnd;
/** @var int[] */
protected $sites;
/** @var \Piwik\Segment */
protected $segment;
/**
* @var string
*/
private $queryOriginHint = '';
/**
* @var LoggerInterface
*/
private $logger;
/**
* Constructor.
*
* @param \Piwik\ArchiveProcessor\Parameters $params
*/
public function __construct(Parameters $params, LoggerInterface $logger = null)
{
$this->dateStart = $params->getDateTimeStart();
$this->dateEnd = $params->getDateTimeEnd();
$this->segment = $params->getSegment();
$this->sites = $params->getIdSites();
$this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
}
public function getSegment()
{
return $this->segment;
}
public function setQueryOriginHint($nameOfOrigiin)
{
$this->queryOriginHint = $nameOfOrigiin;
}
public function generateQuery($select, $from, $where, $groupBy, $orderBy, $limit = 0, $offset = 0)
{
$bind = $this->getGeneralQueryBindParams();
$query = $this->segment->getSelectQuery($select, $from, $where, $bind, $orderBy, $groupBy, $limit, $offset);
$select = 'SELECT';
if ($this->queryOriginHint && is_array($query) && 0 === strpos(trim($query['sql']), $select)) {
$query['sql'] = trim($query['sql']);
$query['sql'] = 'SELECT /* ' . $this->queryOriginHint . ' */' . substr($query['sql'], strlen($select));
}
// Log on DEBUG level all SQL archiving queries
$this->logger->debug($query['sql']);
return $query;
}
protected function getVisitsMetricFields()
{
return array(
Metrics::INDEX_NB_UNIQ_VISITORS => "count(distinct " . self::LOG_VISIT_TABLE . ".idvisitor)",
Metrics::INDEX_NB_UNIQ_FINGERPRINTS => "count(distinct " . self::LOG_VISIT_TABLE . ".config_id)",
Metrics::INDEX_NB_VISITS => "count(*)",
Metrics::INDEX_NB_ACTIONS => "sum(" . self::LOG_VISIT_TABLE . ".visit_total_actions)",
Metrics::INDEX_MAX_ACTIONS => "max(" . self::LOG_VISIT_TABLE . ".visit_total_actions)",
Metrics::INDEX_SUM_VISIT_LENGTH => "sum(" . self::LOG_VISIT_TABLE . ".visit_total_time)",
Metrics::INDEX_BOUNCE_COUNT => "sum(case " . self::LOG_VISIT_TABLE . ".visit_total_actions when 1 then 1 when 0 then 1 else 0 end)",
Metrics::INDEX_NB_VISITS_CONVERTED => "sum(case " . self::LOG_VISIT_TABLE . ".visit_goal_converted when 1 then 1 else 0 end)",
Metrics::INDEX_NB_USERS => "count(distinct " . self::LOG_VISIT_TABLE . ".user_id)",
);
}
public static function getConversionsMetricFields()
{
return array(
Metrics::INDEX_GOAL_NB_CONVERSIONS => "count(*)",
Metrics::INDEX_GOAL_NB_VISITS_CONVERTED => "count(distinct " . self::LOG_CONVERSION_TABLE . ".idvisit)",
Metrics::INDEX_GOAL_REVENUE => self::getSqlConversionRevenueSum(self::TOTAL_REVENUE_FIELD),
Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SUBTOTAL => self::getSqlConversionRevenueSum(self::REVENUE_SUBTOTAL_FIELD),
Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_TAX => self::getSqlConversionRevenueSum(self::REVENUE_TAX_FIELD),
Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SHIPPING => self::getSqlConversionRevenueSum(self::REVENUE_SHIPPING_FIELD),
Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_DISCOUNT => self::getSqlConversionRevenueSum(self::REVENUE_DISCOUNT_FIELD),
Metrics::INDEX_GOAL_ECOMMERCE_ITEMS => "SUM(" . self::LOG_CONVERSION_TABLE . "." . self::ITEMS_COUNT_FIELD . ")",
);
}
private static function getSqlConversionRevenueSum($field)
{
return self::getSqlRevenue('SUM(' . self::LOG_CONVERSION_TABLE . '.' . $field . ')');
}
public static function getSqlRevenue($field)
{
return "ROUND(" . $field . "," . GoalManager::REVENUE_PRECISION . ")";
}
/**
* Helper function that returns an array with common metrics for a given log_visit field distinct values.
*
* The statistics returned are:
* - number of unique visitors
* - number of visits
* - number of actions
* - maximum number of action for a visit
* - sum of the visits' length in sec
* - count of bouncing visits (visits with one page view)
*
* For example if $dimension = 'config_os' it will return the statistics for every distinct Operating systems
* The returned array will have a row per distinct operating systems,
* and a column per stat (nb of visits, max actions, etc)
*
* 'label' Metrics::INDEX_NB_UNIQ_VISITORS Metrics::INDEX_NB_VISITS etc.
* Linux 27 66 ...
* Windows XP 12 ...
* Mac OS 15 36 ...
*
* @param string $dimension Table log_visit field name to be use to compute common stats
* @return DataArray
*/
public function getMetricsFromVisitByDimension($dimension)
{
if (!is_array($dimension)) {
$dimension = array($dimension);
}
if (count($dimension) == 1) {
$dimension = array("label" => reset($dimension));
}
$query = $this->queryVisitsByDimension($dimension);
$metrics = new DataArray();
while ($row = $query->fetch()) {
$metrics->sumMetricsVisits($row["label"], $row);
}
return $metrics;
}
/**
* Executes and returns a query aggregating visit logs, optionally grouping by some dimension. Returns
* a DB statement that can be used to iterate over the result
*
* **Result Set**
*
* The following columns are in each row of the result set:
*
* - **{@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}**: The total number of unique visitors in this group
* of aggregated visits.
* - **{@link Piwik\Metrics::INDEX_NB_VISITS}**: The total number of visits aggregated.
* - **{@link Piwik\Metrics::INDEX_NB_ACTIONS}**: The total number of actions performed in this group of
* aggregated visits.
* - **{@link Piwik\Metrics::INDEX_MAX_ACTIONS}**: The maximum actions perfomred in one visit for this group of
* visits.
* - **{@link Piwik\Metrics::INDEX_SUM_VISIT_LENGTH}**: The total amount of time spent on the site for this
* group of visits.
* - **{@link Piwik\Metrics::INDEX_BOUNCE_COUNT}**: The total number of bounced visits in this group of
* visits.
* - **{@link Piwik\Metrics::INDEX_NB_VISITS_CONVERTED}**: The total number of visits for which at least one
* conversion occurred, for this group of visits.
*
* Additional data can be selected by setting the `$additionalSelects` parameter.
*
* _Note: The metrics returned by this query can be customized by the `$metrics` parameter._
*
* @param array|string $dimensions `SELECT` fields (or just one field) that will be grouped by,
* eg, `'referrer_name'` or `array('referrer_name', 'referrer_keyword')`.
* The metrics retrieved from the query will be specific to combinations
* of these fields. So if `array('referrer_name', 'referrer_keyword')`
* is supplied, the query will aggregate visits for each referrer/keyword
* combination.
* @param bool|string $where Additional condition for the `WHERE` clause. Can be used to filter
* the set of visits that are considered for aggregation.
* @param array $additionalSelects Additional `SELECT` fields that are not included in the group by
* clause. These can be aggregate expressions, eg, `SUM(somecol)`.
* @param bool|array $metrics The set of metrics to calculate and return. If false, the query will select
* all of them. The following values can be used:
*
* - {@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}
* - {@link Piwik\Metrics::INDEX_NB_VISITS}
* - {@link Piwik\Metrics::INDEX_NB_ACTIONS}
* - {@link Piwik\Metrics::INDEX_MAX_ACTIONS}
* - {@link Piwik\Metrics::INDEX_SUM_VISIT_LENGTH}
* - {@link Piwik\Metrics::INDEX_BOUNCE_COUNT}
* - {@link Piwik\Metrics::INDEX_NB_VISITS_CONVERTED}
* @param bool|\Piwik\RankingQuery $rankingQuery
* A pre-configured ranking query instance that will be used to limit the result.
* If set, the return value is the array returned by {@link Piwik\RankingQuery::execute()}.
* @return mixed A Zend_Db_Statement if `$rankingQuery` isn't supplied, otherwise the result of
* {@link Piwik\RankingQuery::execute()}. Read {@link queryVisitsByDimension() this}
* to see what aggregate data is calculated by the query.
* @api
*/
public function queryVisitsByDimension(array $dimensions = array(), $where = false, array $additionalSelects = array(),
$metrics = false, $rankingQuery = false)
{
$tableName = self::LOG_VISIT_TABLE;
$availableMetrics = $this->getVisitsMetricFields();
$select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics, $metrics);
$from = array($tableName);
$where = $this->getWhereStatement($tableName, self::VISIT_DATETIME_FIELD, $where);
$groupBy = $this->getGroupByStatement($dimensions, $tableName);
$orderBy = false;
if ($rankingQuery) {
$orderBy = '`' . Metrics::INDEX_NB_VISITS . '` DESC';
}
$query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
if ($rankingQuery) {
unset($availableMetrics[Metrics::INDEX_MAX_ACTIONS]);
$sumColumns = array_keys($availableMetrics);
if ($metrics) {
$sumColumns = array_intersect($sumColumns, $metrics);
}
$rankingQuery->addColumn($sumColumns, 'sum');
if ($this->isMetricRequested(Metrics::INDEX_MAX_ACTIONS, $metrics)) {
$rankingQuery->addColumn(Metrics::INDEX_MAX_ACTIONS, 'max');
}
return $rankingQuery->execute($query['sql'], $query['bind']);
}
return $this->getDb()->query($query['sql'], $query['bind']);
}
protected function getSelectsMetrics($metricsAvailable, $metricsRequested = false)
{
$selects = array();
foreach ($metricsAvailable as $metricId => $statement) {
if ($this->isMetricRequested($metricId, $metricsRequested)) {
$aliasAs = $this->getSelectAliasAs($metricId);
$selects[] = $statement . $aliasAs;
}
}
return $selects;
}
protected function getSelectStatement($dimensions, $tableName, $additionalSelects, array $availableMetrics, $requestedMetrics = false)
{
$dimensionsToSelect = $this->getDimensionsToSelect($dimensions, $additionalSelects);
$selects = array_merge(
$this->getSelectDimensions($dimensionsToSelect, $tableName),
$this->getSelectsMetrics($availableMetrics, $requestedMetrics),
!empty($additionalSelects) ? $additionalSelects : array()
);
$select = implode(self::FIELDS_SEPARATOR, $selects);
return $select;
}
/**
* Will return the subset of $dimensions that are not found in $additionalSelects
*
* @param $dimensions
* @param array $additionalSelects
* @return array
*/
protected function getDimensionsToSelect($dimensions, $additionalSelects)
{
if (empty($additionalSelects)) {
return $dimensions;
}
$dimensionsToSelect = array();
foreach ($dimensions as $selectAs => $dimension) {
$asAlias = $this->getSelectAliasAs($dimension);
foreach ($additionalSelects as $additionalSelect) {
if (strpos($additionalSelect, $asAlias) === false) {
$dimensionsToSelect[$selectAs] = $dimension;
}
}
}
$dimensionsToSelect = array_unique($dimensionsToSelect);
return $dimensionsToSelect;
}
/**
* Returns the dimensions array, where
* (1) the table name is prepended to the field
* (2) the "AS `label` " is appended to the field
*
* @param $dimensions
* @param $tableName
* @param bool $appendSelectAs
* @param bool $parseSelectAs
* @return mixed
*/
protected function getSelectDimensions($dimensions, $tableName, $appendSelectAs = true)
{
foreach ($dimensions as $selectAs => &$field) {
$selectAsString = $field;
if (!is_numeric($selectAs)) {
$selectAsString = $selectAs;
} else if ($this->isFieldFunctionOrComplexExpression($field)) {
// if complex expression has a select as, use it
if (!$appendSelectAs && preg_match('/\s+AS\s+(.*?)\s*$/', $field, $matches)) {
$field = $matches[1];
continue;
}
// if function w/o select as, do not alias or prefix
$selectAsString = $appendSelectAs = false;
}
$isKnownField = !in_array($field, array('referrer_data'));
if ($selectAsString == $field && $isKnownField) {
$field = $this->prefixColumn($field, $tableName);
}
if ($appendSelectAs && $selectAsString) {
$field = $this->prefixColumn($field, $tableName) . $this->getSelectAliasAs($selectAsString);
}
}
return $dimensions;
}
/**
* Prefixes a column name with a table name if not already done.
*
* @param string $column eg, 'location_provider'
* @param string $tableName eg, 'log_visit'
* @return string eg, 'log_visit.location_provider'
*/
private function prefixColumn($column, $tableName)
{
if (strpos($column, '.') === false) {
return $tableName . '.' . $column;
} else {
return $column;
}
}
protected function isFieldFunctionOrComplexExpression($field)
{
return strpos($field, "(") !== false
|| strpos($field, "CASE") !== false;
}
protected function getSelectAliasAs($metricId)
{
return " AS `" . $metricId . "`";
}
protected function isMetricRequested($metricId, $metricsRequested)
{
// do not process INDEX_NB_UNIQ_FINGERPRINTS unless specifically asked for
if ($metricsRequested === false) {
if ($metricId == Metrics::INDEX_NB_UNIQ_FINGERPRINTS) {
return false;
}
return true;
}
return in_array($metricId, $metricsRequested);
}
public function getWhereStatement($tableName, $datetimeField, $extraWhere = false)
{
$where = "$tableName.$datetimeField >= ?
AND $tableName.$datetimeField <= ?
AND $tableName.idsite IN (". Common::getSqlStringFieldsArray($this->sites) . ")";
if (!empty($extraWhere)) {
$extraWhere = sprintf($extraWhere, $tableName, $tableName);
$where .= ' AND ' . $extraWhere;
}
return $where;
}
protected function getGroupByStatement($dimensions, $tableName)
{
$dimensions = $this->getSelectDimensions($dimensions, $tableName, $appendSelectAs = false);
$groupBy = implode(", ", $dimensions);
return $groupBy;
}
/**
* Returns general bind parameters for all log aggregation queries. This includes the datetime
* start of entities, datetime end of entities and IDs of all sites.
*
* @return array
*/
public function getGeneralQueryBindParams()
{
$bind = array($this->dateStart->toString(Date::DATE_TIME_FORMAT), $this->dateEnd->toString(Date::DATE_TIME_FORMAT));
$bind = array_merge($bind, $this->sites);
return $bind;
}
/**
* Executes and returns a query aggregating ecommerce item data (everything stored in the
* **log\_conversion\_item** table) and returns a DB statement that can be used to iterate over the result
*
* <a name="queryEcommerceItems-result-set"></a>
* **Result Set**
*
* Each row of the result set represents an aggregated group of ecommerce items. The following
* columns are in each row of the result set:
*
* - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ITEM_REVENUE}**: The total revenue for the group of items.
* - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ITEM_QUANTITY}**: The total number of items in this group.
* - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ITEM_PRICE}**: The total price for the group of items.
* - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ORDERS}**: The total number of orders this group of items
* belongs to. This will be <= to the total number
* of items in this group.
* - **{@link Piwik\Metrics::INDEX_NB_VISITS}**: The total number of visits that caused these items to be logged.
* - **ecommerceType**: Either {@link Piwik\Tracker\GoalManager::IDGOAL_CART} if the items in this group were
* abandoned by a visitor, or {@link Piwik\Tracker\GoalManager::IDGOAL_ORDER} if they
* were ordered by a visitor.
*
* **Limitations**
*
* Segmentation is not yet supported for this aggregation method.
*
* @param string $dimension One or more **log\_conversion\_item** columns to group aggregated data by.
* Eg, `'idaction_sku'` or `'idaction_sku, idaction_category'`.
* @return \Zend_Db_Statement A statement object that can be used to iterate through the query's
* result set. See [above](#queryEcommerceItems-result-set) to learn more
* about what this query selects.
* @api
*/
public function queryEcommerceItems($dimension)
{
$query = $this->generateQuery(
// SELECT ...
implode(
', ',
array(
"log_action.name AS label",
sprintf("log_conversion_item.%s AS labelIdAction", $dimension),
sprintf(
'%s AS `%d`',
self::getSqlRevenue('SUM(log_conversion_item.quantity * log_conversion_item.price)'),
Metrics::INDEX_ECOMMERCE_ITEM_REVENUE
),
sprintf(
'%s AS `%d`',
self::getSqlRevenue('SUM(log_conversion_item.quantity)'),
Metrics::INDEX_ECOMMERCE_ITEM_QUANTITY
),
sprintf(
'%s AS `%d`',
self::getSqlRevenue('SUM(log_conversion_item.price)'),
Metrics::INDEX_ECOMMERCE_ITEM_PRICE
),
sprintf(
'COUNT(distinct log_conversion_item.idorder) AS `%d`',
Metrics::INDEX_ECOMMERCE_ORDERS
),
sprintf(
'COUNT(distinct log_conversion_item.idvisit) AS `%d`',
Metrics::INDEX_NB_VISITS
),
sprintf(
'CASE log_conversion_item.idorder WHEN \'0\' THEN %d ELSE %d END AS ecommerceType',
GoalManager::IDGOAL_CART,
GoalManager::IDGOAL_ORDER
)
)
),
// FROM ...
array(
"log_conversion_item",
array(
"table" => "log_action",
"joinOn" => sprintf("log_conversion_item.%s = log_action.idaction", $dimension)
)
),
// WHERE ... AND ...
implode(
' AND ',
array(
'log_conversion_item.server_time >= ?',
'log_conversion_item.server_time <= ?',
'log_conversion_item.idsite IN (' . Common::getSqlStringFieldsArray($this->sites) . ')',
'log_conversion_item.deleted = 0'
)
),
// GROUP BY ...
sprintf(
"ecommerceType, log_conversion_item.%s",
$dimension
),
// ORDER ...
false
);
return $this->getDb()->query($query['sql'], $query['bind']);
}
/**
* Executes and returns a query aggregating action data (everything in the log_action table) and returns
* a DB statement that can be used to iterate over the result
*
* <a name="queryActionsByDimension-result-set"></a>
* **Result Set**
*
* Each row of the result set represents an aggregated group of actions. The following columns
* are in each aggregate row:
*
* - **{@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}**: The total number of unique visitors that performed
* the actions in this group.
* - **{@link Piwik\Metrics::INDEX_NB_VISITS}**: The total number of visits these actions belong to.
* - **{@link Piwik\Metrics::INDEX_NB_ACTIONS}**: The total number of actions in this aggregate group.
*
* Additional data can be selected through the `$additionalSelects` parameter.
*
* _Note: The metrics calculated by this query can be customized by the `$metrics` parameter._
*
* @param array|string $dimensions One or more SELECT fields that will be used to group the log_action
* rows by. This parameter determines which log_action rows will be
* aggregated together.
* @param bool|string $where Additional condition for the WHERE clause. Can be used to filter
* the set of visits that are considered for aggregation.
* @param array $additionalSelects Additional SELECT fields that are not included in the group by
* clause. These can be aggregate expressions, eg, `SUM(somecol)`.
* @param bool|array $metrics The set of metrics to calculate and return. If `false`, the query will select
* all of them. The following values can be used:
*
* - {@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}
* - {@link Piwik\Metrics::INDEX_NB_VISITS}
* - {@link Piwik\Metrics::INDEX_NB_ACTIONS}
* @param bool|\Piwik\RankingQuery $rankingQuery
* A pre-configured ranking query instance that will be used to limit the result.
* If set, the return value is the array returned by {@link Piwik\RankingQuery::execute()}.
* @param bool|string $joinLogActionOnColumn One or more columns from the **log_link_visit_action** table that
* log_action should be joined on. The table alias used for each join
* is `"log_action$i"` where `$i` is the index of the column in this
* array.
*
* If a string is used for this parameter, the table alias is not
* suffixed (since there is only one column).
* @return mixed A Zend_Db_Statement if `$rankingQuery` isn't supplied, otherwise the result of
* {@link Piwik\RankingQuery::execute()}. Read [this](#queryEcommerceItems-result-set)
* to see what aggregate data is calculated by the query.
* @api
*/
public function queryActionsByDimension($dimensions, $where = '', $additionalSelects = array(), $metrics = false, $rankingQuery = null, $joinLogActionOnColumn = false)
{
$tableName = self::LOG_ACTIONS_TABLE;
$availableMetrics = $this->getActionsMetricFields();
$select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics, $metrics);
$from = array($tableName);
$where = $this->getWhereStatement($tableName, self::ACTION_DATETIME_FIELD, $where);
$groupBy = $this->getGroupByStatement($dimensions, $tableName);
$orderBy = false;
if ($joinLogActionOnColumn !== false) {
$multiJoin = is_array($joinLogActionOnColumn);
if (!$multiJoin) {
$joinLogActionOnColumn = array($joinLogActionOnColumn);
}
foreach ($joinLogActionOnColumn as $i => $joinColumn) {
$tableAlias = 'log_action' . ($multiJoin ? $i + 1 : '');
if (strpos($joinColumn, ' ') === false) {
$joinOn = $tableAlias . '.idaction = ' . $tableName . '.' . $joinColumn;
} else {
// more complex join column like if (...)
$joinOn = $tableAlias . '.idaction = ' . $joinColumn;
}
$from[] = array(
'table' => 'log_action',
'tableAlias' => $tableAlias,
'joinOn' => $joinOn
);
}
}
if ($rankingQuery) {
$orderBy = '`' . Metrics::INDEX_NB_ACTIONS . '` DESC';
}
$query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
if ($rankingQuery !== null) {
$sumColumns = array_keys($availableMetrics);
if ($metrics) {
$sumColumns = array_intersect($sumColumns, $metrics);
}
$rankingQuery->addColumn($sumColumns, 'sum');
return $rankingQuery->execute($query['sql'], $query['bind']);
}
return $this->getDb()->query($query['sql'], $query['bind']);
}
protected function getActionsMetricFields()
{
return array(
Metrics::INDEX_NB_VISITS => "count(distinct " . self::LOG_ACTIONS_TABLE . ".idvisit)",
Metrics::INDEX_NB_UNIQ_VISITORS => "count(distinct " . self::LOG_ACTIONS_TABLE . ".idvisitor)",
Metrics::INDEX_NB_ACTIONS => "count(*)",
);
}
/**
* Executes a query aggregating conversion data (everything in the **log_conversion** table) and returns
* a DB statement that can be used to iterate over the result.
*
* <a name="queryConversionsByDimension-result-set"></a>
* **Result Set**
*
* Each row of the result set represents an aggregated group of conversions. The
* following columns are in each aggregate row:
*
* - **{@link Piwik\Metrics::INDEX_GOAL_NB_CONVERSIONS}**: The total number of conversions in this aggregate
* group.
* - **{@link Piwik\Metrics::INDEX_GOAL_NB_VISITS_CONVERTED}**: The total number of visits during which these
* conversions were converted.
* - **{@link Piwik\Metrics::INDEX_GOAL_REVENUE}**: The total revenue generated by these conversions. This value
* includes the revenue from individual ecommerce items.
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SUBTOTAL}**: The total cost of all ecommerce items sold
* within these conversions. This value does not
* include tax, shipping or any applied discount.
*
* _This metric is only applicable to the special
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_TAX}**: The total tax applied to every transaction in these
* conversions.
*
* _This metric is only applicable to the special
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SHIPPING}**: The total shipping cost for every transaction
* in these conversions.
*
* _This metric is only applicable to the special
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_DISCOUNT}**: The total discount applied to every transaction
* in these conversions.
*
* _This metric is only applicable to the special
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_ITEMS}**: The total number of ecommerce items sold in each transaction
* in these conversions.
*
* _This metric is only applicable to the special
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
*
* Additional data can be selected through the `$additionalSelects` parameter.
*
* _Note: This method will only query the **log_conversion** table. Other tables cannot be joined
* using this method._
*
* @param array|string $dimensions One or more **SELECT** fields that will be used to group the log_conversion
* rows by. This parameter determines which **log_conversion** rows will be
* aggregated together.
* @param bool|string $where An optional SQL expression used in the SQL's **WHERE** clause.
* @param array $additionalSelects Additional SELECT fields that are not included in the group by
* clause. These can be aggregate expressions, eg, `SUM(somecol)`.
* @return \Zend_Db_Statement
*/
public function queryConversionsByDimension($dimensions = array(), $where = false, $additionalSelects = array(), $extraFrom = [])
{
$dimensions = array_merge(array(self::IDGOAL_FIELD), $dimensions);
$tableName = self::LOG_CONVERSION_TABLE;
$availableMetrics = $this->getConversionsMetricFields();
$select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics);
$from = array_merge([$tableName], $extraFrom);
$where = $this->getWhereStatement($tableName, self::CONVERSION_DATETIME_FIELD, $where);
$groupBy = $this->getGroupByStatement($dimensions, $tableName);
$orderBy = false;
$query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
return $this->getDb()->query($query['sql'], $query['bind']);
}
/**
* Creates and returns an array of SQL `SELECT` expressions that will each count how
* many rows have a column whose value is within a certain range.
*
* **Note:** The result of this function is meant for use in the `$additionalSelects` parameter
* in one of the query... methods (for example {@link queryVisitsByDimension()}).
*
* **Example**
*
* // summarize one column
* $visitTotalActionsRanges = array(
* array(1, 1),
* array(2, 10),
* array(10)
* );
* $selects = LogAggregator::getSelectsFromRangedColumn('visit_total_actions', $visitTotalActionsRanges, 'log_visit', 'vta');
*
* // summarize another column in the same request
* $visitCountVisitsRanges = array(
* array(1, 1),
* array(2, 20),
* array(20)
* );
* $selects = array_merge(
* $selects,
* LogAggregator::getSelectsFromRangedColumn('visitor_count_visits', $visitCountVisitsRanges, 'log_visit', 'vcv')
* );
*
* // perform the query
* $logAggregator = // get the LogAggregator somehow
* $query = $logAggregator->queryVisitsByDimension($dimensions = array(), $where = false, $selects);
* $tableSummary = $query->fetch();
*
* $numberOfVisitsWithOneAction = $tableSummary['vta0'];
* $numberOfVisitsBetweenTwoAnd10 = $tableSummary['vta1'];
*
* $numberOfVisitsWithVisitCountOfOne = $tableSummary['vcv0'];
*
* @param string $column The name of a column in `$table` that will be summarized.
* @param array $ranges The array of ranges over which the data in the table
* will be summarized. For example,
* ```
* array(
* array(1, 1),
* array(2, 2),
* array(3, 8),
* array(8) // everything over 8
* )
* ```
* @param string $table The unprefixed name of the table whose rows will be summarized.
* @param string $selectColumnPrefix The prefix to prepend to each SELECT expression. This
* prefix is used to differentiate different sets of
* range summarization SELECTs. You can supply different
* values to this argument to summarize several columns
* in one query (see above for an example).
* @param bool $restrictToReturningVisitors Whether to only summarize rows that belong to
* visits of returning visitors or not. If this
* argument is true, then the SELECT expressions
* returned can only be used with the
* {@link queryVisitsByDimension()} method.
* @return array An array of SQL SELECT expressions, for example,
* ```
* array(
* 'sum(case when log_visit.visit_total_actions between 0 and 2 then 1 else 0 end) as vta0',
* 'sum(case when log_visit.visit_total_actions > 2 then 1 else 0 end) as vta1'
* )
* ```
* @api
*/
public static function getSelectsFromRangedColumn($column, $ranges, $table, $selectColumnPrefix, $restrictToReturningVisitors = false)
{
$selects = array();
$extraCondition = '';
if ($restrictToReturningVisitors) {
// extra condition for the SQL SELECT that makes sure only returning visits are counted
// when creating the 'days since last visit' report
$extraCondition = 'and log_visit.visitor_returning = 1';
$extraSelect = "sum(case when log_visit.visitor_returning = 0 then 1 else 0 end) "
. " as `" . $selectColumnPrefix . 'General_NewVisits' . "`";
$selects[] = $extraSelect;
}
foreach ($ranges as $gap) {
if (count($gap) == 2) {
$lowerBound = $gap[0];
$upperBound = $gap[1];
$selectAs = "$selectColumnPrefix$lowerBound-$upperBound";
$selects[] = "sum(case when $table.$column between $lowerBound and $upperBound $extraCondition" .
" then 1 else 0 end) as `$selectAs`";
} else {
$lowerBound = $gap[0];
$selectAs = $selectColumnPrefix . ($lowerBound + 1) . urlencode('+');
$selects[] = "sum(case when $table.$column > $lowerBound $extraCondition then 1 else 0 end) as `$selectAs`";
}
}
return $selects;
}
/**
* Clean up the row data and return values.
* $lookForThisPrefix can be used to make sure only SOME of the data in $row is used.
*
* The array will have one column $columnName
*
* @param $row
* @param $columnName
* @param bool $lookForThisPrefix A string that identifies which elements of $row to use
* in the result. Every key of $row that starts with this
* value is used.
* @return array
*/
public static function makeArrayOneColumn($row, $columnName, $lookForThisPrefix = false)
{
$cleanRow = array();
foreach ($row as $label => $count) {
if (empty($lookForThisPrefix)
|| strpos($label, $lookForThisPrefix) === 0
) {
$cleanLabel = substr($label, strlen($lookForThisPrefix));
$cleanRow[$cleanLabel] = array($columnName => $count);
}
}
return $cleanRow;
}
public function getDb()
{
return Db::get();
}
}

View File

@ -0,0 +1,301 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess;
use Exception;
use Piwik\DataAccess\LogQueryBuilder\JoinGenerator;
use Piwik\DataAccess\LogQueryBuilder\JoinTables;
use Piwik\Plugin\LogTablesProvider;
use Piwik\Segment\SegmentExpression;
class LogQueryBuilder
{
/**
* @var LogTablesProvider
*/
private $logTableProvider;
/**
* Forces to use a subselect when generating the query. Set value to `false` to force not using a subselect.
* @var string
*/
private $forcedInnerGroupBy = '';
public function __construct(LogTablesProvider $logTablesProvider)
{
$this->logTableProvider = $logTablesProvider;
}
/**
* Forces to use a subselect when generating the query.
* @var string
*/
public function forceInnerGroupBySubselect($innerGroupBy)
{
$this->forcedInnerGroupBy = $innerGroupBy;
}
public function getSelectQueryString(SegmentExpression $segmentExpression, $select, $from, $where, $bind, $groupBy,
$orderBy, $limitAndOffset)
{
if (!is_array($from)) {
$from = array($from);
}
$fromInitially = $from;
if (!$segmentExpression->isEmpty()) {
$segmentExpression->parseSubExpressionsIntoSqlExpressions($from);
$segmentSql = $segmentExpression->getSql();
$where = $this->getWhereMatchBoth($where, $segmentSql['where']);
$bind = array_merge($bind, $segmentSql['bind']);
}
$tables = new JoinTables($this->logTableProvider, $from);
$join = new JoinGenerator($tables);
$join->generate();
$from = $join->getJoinString();
$joinWithSubSelect = $join->shouldJoinWithSelect();
// hack for https://github.com/piwik/piwik/issues/9194#issuecomment-164321612
$useSpecialConversionGroupBy = (!empty($segmentSql)
&& strpos($groupBy, 'log_conversion.idgoal') !== false
&& $fromInitially == array('log_conversion')
&& strpos($from, 'log_link_visit_action') !== false);
if (!empty($this->forcedInnerGroupBy)) {
$sql = $this->buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset, $tables, $this->forcedInnerGroupBy);
} elseif ($useSpecialConversionGroupBy) {
$innerGroupBy = "CONCAT(log_conversion.idvisit, '_' , log_conversion.idgoal, '_', log_conversion.buster)";
$sql = $this->buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset, $tables, $innerGroupBy);
} elseif ($joinWithSubSelect) {
$sql = $this->buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset, $tables);
} else {
$sql = $this->buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset);
}
return array(
'sql' => $sql,
'bind' => $bind
);
}
private function getKnownTables()
{
$names = array();
foreach ($this->logTableProvider->getAllLogTables() as $logTable) {
$names[] = $logTable->getName();
}
return $names;
}
/**
* Build a select query where actions have to be joined on visits (or conversions)
* In this case, the query gets wrapped in another query so that grouping by visit is possible
* @param string $select
* @param string $from
* @param string $where
* @param string $groupBy
* @param string $orderBy
* @param string $limitAndOffset
* @param null|string $innerGroupBy If given, this inner group by will be used. If not, we try to detect one
* @throws Exception
* @return string
*/
private function buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset, JoinTables $tables, $innerGroupBy = null)
{
$matchTables = $this->getKnownTables();
foreach ($tables as $table) {
if (is_array($table) && isset($table['tableAlias']) && !in_array($table['tableAlias'], $matchTables, $strict = true)) {
$matchTables[] = $table['tableAlias'];
} elseif (is_array($table) && isset($table['table']) && !in_array($table['table'], $matchTables, $strict = true)) {
$matchTables[] = $table['table'];
} elseif (is_string($table) && !in_array($table, $matchTables, $strict = true)) {
$matchTables[] = $table;
}
}
$matchTables = '(' . implode('|', $matchTables) . ')';
preg_match_all("/". $matchTables ."\.[a-z0-9_\*]+/", $select, $matches);
$neededFields = array_unique($matches[0]);
if (count($neededFields) == 0) {
throw new Exception("No needed fields found in select expression. "
. "Please use a table prefix.");
}
$fieldNames = array();
$toBeReplaced = array();
$epregReplace = array();
foreach ($neededFields as &$neededField) {
$parts = explode('.', $neededField);
if (count($parts) === 2 && !empty($parts[1])) {
if (in_array($parts[1], $fieldNames, $strict = true)) {
// eg when selecting 2 dimensions log_action_X.name
$columnAs = $parts[1] . md5($neededField);
$fieldNames[] = $columnAs;
// we make sure to not replace a idvisitor column when duplicate column is idvisit
$toBeReplaced[$neededField . ' '] = $parts[0] . '.' . $columnAs . ' ';
$toBeReplaced[$neededField . ')'] = $parts[0] . '.' . $columnAs . ')';
$toBeReplaced[$neededField . '`'] = $parts[0] . '.' . $columnAs . '`';
$toBeReplaced[$neededField . ','] = $parts[0] . '.' . $columnAs . ',';
// replace when string ends this, we need to use regex to check for this
$epregReplace["/(" . $neededField . ")$/"] = $parts[0] . '.' . $columnAs;
$neededField .= ' as ' . $columnAs;
} else {
$fieldNames[] = $parts[1];
}
}
}
preg_match_all("/". $matchTables . "/", $from, $matchesFrom);
$innerSelect = implode(", \n", $neededFields);
$innerFrom = $from;
$innerWhere = $where;
$innerLimitAndOffset = $limitAndOffset;
$innerOrderBy = "NULL";
if ($innerLimitAndOffset && $orderBy) {
// only When LIMITing we can apply to the inner query the same ORDER BY as the parent query
$innerOrderBy = $orderBy;
}
if ($innerLimitAndOffset) {
// When LIMITing, no need to GROUP BY (GROUPing by is done before the LIMIT which is super slow when large amount of rows is matched)
$innerGroupBy = false;
}
if (!isset($innerGroupBy) && in_array('log_visit', $matchesFrom[1])) {
$innerGroupBy = "log_visit.idvisit";
} elseif (!isset($innerGroupBy)) {
throw new Exception('Cannot use subselect for join as no group by rule is specified');
}
if (!empty($toBeReplaced)) {
$select = preg_replace(array_keys($epregReplace), array_values($epregReplace), $select);
$select = str_replace(array_keys($toBeReplaced), array_values($toBeReplaced), $select);
if (!empty($groupBy)) {
$groupBy = preg_replace(array_keys($epregReplace), array_values($epregReplace), $groupBy);
$groupBy = str_replace(array_keys($toBeReplaced), array_values($toBeReplaced), $groupBy);
}
if (!empty($orderBy)) {
$orderBy = preg_replace(array_keys($epregReplace), array_values($epregReplace), $orderBy);
$orderBy = str_replace(array_keys($toBeReplaced), array_values($toBeReplaced), $orderBy);
}
}
$innerQuery = $this->buildSelectQuery($innerSelect, $innerFrom, $innerWhere, $innerGroupBy, $innerOrderBy, $innerLimitAndOffset);
$select = preg_replace('/'.$matchTables.'\./', 'log_inner.', $select);
$from = "
(
$innerQuery
) AS log_inner";
$where = false;
$orderBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $orderBy);
$groupBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $groupBy);
$outerLimitAndOffset = null;
$query = $this->buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $outerLimitAndOffset);
return $query;
}
/**
* Build select query the normal way
*
* @param string $select fieldlist to be selected
* @param string $from tablelist to select from
* @param string $where where clause
* @param string $groupBy group by clause
* @param string $orderBy order by clause
* @param string|int $limitAndOffset limit by clause eg '5' for Limit 5 Offset 0 or '10, 5' for Limit 5 Offset 10
* @return string
*/
private function buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset)
{
$sql = "
SELECT
$select
FROM
$from";
if ($where) {
$sql .= "
WHERE
$where";
}
if ($groupBy) {
$sql .= "
GROUP BY
$groupBy";
}
if ($orderBy) {
$sql .= "
ORDER BY
$orderBy";
}
$sql = $this->appendLimitClauseToQuery($sql, $limitAndOffset);
return $sql;
}
/**
* @param $sql
* @param $limit LIMIT clause eg. "10, 50" (offset 10, limit 50)
* @return string
*/
private function appendLimitClauseToQuery($sql, $limit)
{
$limitParts = explode(',', (string) $limit);
$isLimitWithOffset = 2 === count($limitParts);
if ($isLimitWithOffset) {
// $limit = "10, 5". We would not have to do this but we do to prevent possible injections.
$offset = trim($limitParts[0]);
$limit = trim($limitParts[1]);
$sql .= sprintf(' LIMIT %d, %d', $offset, $limit);
} else {
// $limit = "5"
$limit = (int)$limit;
if ($limit >= 1) {
$sql .= " LIMIT $limit";
}
}
return $sql;
}
/**
* @param $where
* @param $segmentWhere
* @return string
* @throws
*/
protected function getWhereMatchBoth($where, $segmentWhere)
{
if (empty($segmentWhere) && empty($where)) {
throw new \Exception("Segment where clause should be non empty.");
}
if (empty($segmentWhere)) {
return $where;
}
if (empty($where)) {
return $segmentWhere;
}
return "( $where )
AND
($segmentWhere)";
}
}

View File

@ -0,0 +1,316 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess\LogQueryBuilder;
use Exception;
use Piwik\Common;
use Piwik\Tracker\LogTable;
class JoinGenerator
{
/**
* @var JoinTables
*/
protected $tables;
/**
* @var bool
*/
private $joinWithSubSelect = false;
/**
* @var string
*/
private $joinString = '';
/**
* @var array
*/
private $nonVisitJoins = array();
public function __construct(JoinTables $tables)
{
$this->tables = $tables;
$this->addMissingTablesNeededForJoins();
}
private function addMissingTablesNeededForJoins()
{
foreach ($this->tables as $index => $table) {
if (is_array($table)) {
continue;
}
$logTable = $this->tables->getLogTable($table);
if (!$logTable->getColumnToJoinOnIdVisit()) {
$tableNameToJoin = $logTable->getLinkTableToBeAbleToJoinOnVisit();
if (empty($tableNameToJoin) && $logTable->getWaysToJoinToOtherLogTables()) {
foreach ($logTable->getWaysToJoinToOtherLogTables() as $otherLogTable => $column) {
if ($this->tables->hasJoinedTable($otherLogTable)) {
$this->tables->addTableDependency($table, $otherLogTable);
continue;
}
if ($this->tables->isTableJoinableOnVisit($otherLogTable) || $this->tables->isTableJoinableOnAction($otherLogTable)) {
$this->addMissingTablesForOtherTableJoin($otherLogTable, $table);
}
}
continue;
}
if ($index > 0 && !$this->tables->hasJoinedTable($tableNameToJoin)) {
$this->tables->addTableToJoin($tableNameToJoin);
}
if ($this->tables->hasJoinedTable($tableNameToJoin)) {
$this->generateNonVisitJoins($table, $tableNameToJoin, $index);
}
}
}
foreach ($this->tables as $index => $table) {
if (is_array($table)) {
if (!isset($table['tableAlias'])) {
$tableName = $table['table'];
$numTables = count($this->tables);
for ($j = $index + 1; $j < $numTables; $j++) {
if (!isset($this->tables[$j])) {
continue;
}
$tableOther = $this->tables[$j];
if (is_string($tableOther) && $tableOther === $tableName) {
unset($this->tables[$j]);
}
}
}
} elseif (is_string($table)) {
$numTables = count($this->tables);
for ($j = $index + 1; $j < $numTables; $j++) {
if (isset($this->tables[$j]) && is_array($this->tables[$j]) && !isset($this->tables[$j]['tableAlias'])) {
$tableOther = $this->tables[$j];
if ($table === $tableOther['table']) {
$message = sprintf('Please reorganize the joined tables as the table %s in %s cannot be joined correctly. We recommend to join tables with arrays first. %s', $table, json_encode($this->tables), json_encode(debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 10)));
throw new Exception($message);
}
}
}
}
}
}
private function addMissingTablesForOtherTableJoin($tableName, $dependentTable)
{
$this->tables->addTableDependency($dependentTable, $tableName);
if ($this->tables->hasJoinedTable($tableName)) {
return;
}
$table = $this->tables->getLogTable($tableName);
if ($table->getColumnToJoinOnIdAction() || $table->getColumnToJoinOnIdAction() || $table->getLinkTableToBeAbleToJoinOnVisit()) {
$this->tables->addTableToJoin($tableName);
return;
}
$otherTableJoins = $table->getWaysToJoinToOtherLogTables();
foreach ($otherTableJoins as $logTable => $column) {
$this->addMissingTablesForOtherTableJoin($logTable, $tableName);
}
$this->tables->addTableToJoin($tableName);
}
/**
* Generate the join sql based on the needed tables
* @throws Exception if tables can't be joined
* @return array
*/
public function generate()
{
/** @var LogTable[] $availableLogTables */
$availableLogTables = array();
$this->tables->sort();
foreach ($this->tables as $i => $table) {
if (is_array($table)) {
// join condition provided
$alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table'];
if (isset($table['join'])) {
$this->joinString .= ' ' . $table['join'];
} else {
$this->joinString .= ' LEFT JOIN';
}
if (!isset($table['joinOn']) && $this->tables->getLogTable($table['table'])) {
$logTable = $this->tables->getLogTable($table['table']);
if (!empty($availableLogTables)) {
$table['joinOn'] = $this->findJoinCriteriasForTables($logTable, $availableLogTables);
}
if (!isset($table['tableAlias'])) {
// eg array('table' => 'log_link_visit_action', 'join' => 'RIGHT JOIN')
// we treat this like a regular string table which we can join automatically
$availableLogTables[$table['table']] = $logTable;
}
}
$this->joinString .= ' ' . Common::prefixTable($table['table']) . " AS " . $alias
. " ON " . $table['joinOn'];
continue;
}
$tableSql = Common::prefixTable($table) . " AS $table";
$logTable = $this->tables->getLogTable($table);
if ($i == 0) {
// first table
$this->joinString .= $tableSql;
} else {
$join = $this->findJoinCriteriasForTables($logTable, $availableLogTables);
if ($join === null) {
$availableLogTables[$table] = $logTable;
continue;
}
// the join sql the default way
$this->joinString .= " LEFT JOIN $tableSql ON " . $join;
}
$availableLogTables[$table] = $logTable;
}
}
public function getJoinString()
{
return $this->joinString;
}
public function shouldJoinWithSelect()
{
return $this->joinWithSubSelect;
}
/**
* @param LogTable $logTable
* @param LogTable[] $availableLogTables
* @return string|null returns null in case the table is already joined, or the join string if the table needs
* to be joined
* @throws Exception if table cannot be joined for segmentation
*/
public function findJoinCriteriasForTables(LogTable $logTable, $availableLogTables)
{
$join = null;
$alternativeJoin = null;
$table = $logTable->getName();
foreach ($availableLogTables as $availableLogTable) {
if ($logTable->getColumnToJoinOnIdVisit() && $availableLogTable->getColumnToJoinOnIdVisit()) {
$join = sprintf("%s.%s = %s.%s", $table, $logTable->getColumnToJoinOnIdVisit(),
$availableLogTable->getName(), $availableLogTable->getColumnToJoinOnIdVisit());
$alternativeJoin = sprintf("%s.%s = %s.%s", $availableLogTable->getName(), $availableLogTable->getColumnToJoinOnIdVisit(),
$table, $logTable->getColumnToJoinOnIdVisit());
if ($availableLogTable->shouldJoinWithSubSelect()) {
$this->joinWithSubSelect = true;
}
break;
}
if ($logTable->getColumnToJoinOnIdAction() && $availableLogTable->getColumnToJoinOnIdAction()) {
if (isset($this->nonVisitJoins[$logTable->getName()][$availableLogTable->getName()])) {
$join = $this->nonVisitJoins[$logTable->getName()][$availableLogTable->getName()];
}
break;
}
$otherJoins = $logTable->getWaysToJoinToOtherLogTables();
foreach ($otherJoins as $joinTable => $column) {
if($availableLogTable->getName() == $joinTable) {
$join = sprintf("`%s`.`%s` = `%s`.`%s`", $table, $column, $availableLogTable->getName(), $column);
break;
}
}
}
if (!isset($join)) {
throw new Exception("Table '$table' can't be joined for segmentation");
}
if ($this->tables->hasJoinedTableManually($table, $join)
|| $this->tables->hasJoinedTableManually($table, $alternativeJoin)) {
// already joined, no need to join it again
return null;
}
return $join;
}
/**
* This code is a bit tricky. We have to execute this right at the beginning before actually iterating over all the
* tables and generating the join string as we may have to delete a table from the tables. If we did not delete
* this table upfront, we would have maybe already added a joinString for that table, even though it will be later
* removed by another table. This means if we wouldn't delete/unset that table upfront, we would need to alter
* an already generated join string which would not be really nice code as well.
*
* Next problem is, because we are deleting a table, we have to remember the "joinOn" string for that table in a
* property "nonVisitJoins". Otherwise we would not be able to generate the correct "joinOn" string when actually
* iterating over all the tables to generate that string.
*
* @param $tableName
* @param $tableNameToJoin
* @param $index
*/
protected function generateNonVisitJoins($tableName, $tableNameToJoin, $index)
{
$logTable = $this->tables->getLogTable($tableName);
$logTableToJoin = $this->tables->getLogTable($tableNameToJoin);
$nonVisitJoin = sprintf("%s.%s = %s.%s", $logTableToJoin->getName(), $logTableToJoin->getColumnToJoinOnIdAction(),
$tableName, $logTable->getColumnToJoinOnIdAction());
$altNonVisitJoin = sprintf("%s.%s = %s.%s", $tableName, $logTable->getColumnToJoinOnIdAction(),
$logTableToJoin->getName(), $logTableToJoin->getColumnToJoinOnIdAction());
if ($index > 0
&& $this->tables->hasAddedTableManually($tableName)
&& !$this->tables->hasJoinedTableManually($tableName, $nonVisitJoin)
&& !$this->tables->hasJoinedTableManually($tableName, $altNonVisitJoin)) {
$tableIndex = $this->tables->findIndexOfManuallyAddedTable($tableName);
$nonVisitJoin = '(' . $this->tables[$tableIndex]['joinOn'] . ' AND ' . $nonVisitJoin . ')';
unset($this->tables[$tableIndex]);
}
if (!isset($this->nonVisitJoins[$tableName])) {
$this->nonVisitJoins[$tableName] = array();
}
if (!isset($this->nonVisitJoins[$tableNameToJoin])) {
$this->nonVisitJoins[$tableNameToJoin] = array();
}
$this->nonVisitJoins[$tableName][$tableNameToJoin] = $nonVisitJoin;
$this->nonVisitJoins[$tableNameToJoin][$tableName] = $nonVisitJoin;
}
}

View File

@ -0,0 +1,358 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess\LogQueryBuilder;
use Exception;
use Piwik\Plugin\LogTablesProvider;
class JoinTables extends \ArrayObject
{
/**
* @var LogTablesProvider
*/
private $logTableProvider;
// NOTE: joins can be specified explicitly as arrays w/ 'joinOn' keys or implicitly as table names. when
// table names are used, the joins dependencies are assumed based on how we want to order those joins.
// the below table list the possible dependencies of each table, and is specifically designed to enforce
// the following order:
// log_link_visit_action, log_action, log_visit, log_conversion, log_conversion_item
// which means if an array is supplied where log_visit comes before log_link_visitAction, it will
// be moved to after it.
private $implicitTableDependencies = [
'log_link_visit_action' => [
// empty
],
'log_action' => [
'log_link_visit_action',
'log_conversion',
'log_conversion_item',
'log_visit',
],
'log_visit' => [
'log_link_visit_action',
'log_action',
],
'log_conversion' => [
'log_link_visit_action',
'log_action',
'log_visit',
],
'log_conversion_item' => [
'log_link_visit_action',
'log_action',
'log_visit',
'log_conversion',
],
];
/**
* Tables constructor.
* @param LogTablesProvider $logTablesProvider
* @param array $tables
*/
public function __construct(LogTablesProvider $logTablesProvider, $tables)
{
$this->logTableProvider = $logTablesProvider;
foreach ($tables as $table) {
$this->checkTableCanBeUsedForSegmentation($table);
}
$this->exchangeArray(array_values($tables));
}
public function getTables()
{
return $this->getArrayCopy();
}
public function addTableToJoin($tableName)
{
$this->checkTableCanBeUsedForSegmentation($tableName);
$this->append($tableName);
}
public function hasJoinedTable($tableName)
{
$tables = in_array($tableName, $this->getTables());
if ($tables) {
return true;
}
foreach ($this as $table) {
if (is_array($table)) {
if (!isset($table['tableAlias']) && $table['table'] === $table) {
return true;
} elseif (isset($table['tableAlias']) && $table['tableAlias'] === $table) {
return true;
}
}
}
return false;
}
public function hasJoinedTableManually($tableToFind, $joinToFind)
{
foreach ($this as $table) {
if (is_array($table)
&& !empty($table['table'])
&& $table['table'] === $tableToFind
&& (!isset($table['tableAlias']) || $table['tableAlias'] === $tableToFind)
&& (!isset($table['join']) || strtolower($table['join']) === 'left join')
&& isset($table['joinOn']) && $table['joinOn'] === $joinToFind) {
return true;
}
}
return false;
}
public function getLogTable($tableName)
{
return $this->logTableProvider->getLogTable($tableName);
}
public function findIndexOfManuallyAddedTable($tableNameToFind)
{
foreach ($this as $index => $table) {
if (is_array($table)
&& !empty($table['table'])
&& $table['table'] === $tableNameToFind
&& (!isset($table['join']) || strtolower($table['join']) === 'left join')
&& (!isset($table['tableAlias']) || $table['tableAlias'] === $tableNameToFind)) {
return $index;
}
}
}
public function hasAddedTableManually($tableToFind)
{
$table = $this->findIndexOfManuallyAddedTable($tableToFind);
return isset($table);
}
public function sort()
{
// we do not use $this->uasort as we do not want to maintain keys
$tables = $this->getTables();
// the first entry is always the FROM table
$firstTable = array_shift($tables);
$dependencies = $this->parseDependencies($tables);
$sorted = [$firstTable];
$this->visitTableListDfs($tables, $dependencies, function ($tableInfo) use (&$sorted) {
$sorted[] = $tableInfo;
});
$this->exchangeArray($sorted);
}
public function isTableJoinableOnVisit($tableToCheck)
{
$table = $this->getLogTable($tableToCheck);
if (empty($table)) {
return false;
}
if ($table->getColumnToJoinOnIdVisit()) {
return true;
}
if ($table->getLinkTableToBeAbleToJoinOnVisit()) {
return true;
}
$otherWays = $table->getWaysToJoinToOtherLogTables();
if (empty($otherWays)) {
return false;
}
foreach ($otherWays as $logTable => $column) {
if ($logTable == 'log_visit' || $this->isTableJoinableOnVisit($logTable)) {
return true;
}
}
return false;
}
public function isTableJoinableOnAction($tableToCheck)
{
$table = $this->getLogTable($tableToCheck);
if (empty($table)) {
return false;
}
if ($table->getColumnToJoinOnIdAction()) {
return true;
}
$otherWays = $table->getWaysToJoinToOtherLogTables();
if (empty($otherWays)) {
return false;
}
foreach ($otherWays as $logTable => $column) {
if ($logTable == 'log_action' || $this->isTableJoinableOnAction($logTable)) {
return true;
}
}
return false;
}
public function addTableDependency($table, $dependentTable)
{
if (!empty($this->implicitTableDependencies[$table])) {
return;
}
$this->implicitTableDependencies[$table] = [$dependentTable];
}
private function checkTableCanBeUsedForSegmentation($tableName)
{
if (!is_array($tableName) && !$this->getLogTable($tableName)) {
throw new Exception("Table '$tableName' can't be used for segmentation");
}
}
private function parseDependencies(array $tables)
{
$dependencies = [];
foreach ($tables as $key => &$fromInfo) {
if (is_string($fromInfo)) {
$dependencies[$key] = $this->assumeImplicitJoinDependencies($tables, $fromInfo);
continue;
}
if (empty($fromInfo['joinOn'])) {
continue;
}
$table = isset($fromInfo['tableAlias']) ? $fromInfo['tableAlias'] : $fromInfo['table'];
$tablesInExpr = $this->parseSqlTables($fromInfo['joinOn'], $table);
$dependencies[$key] = $tablesInExpr;
}
return $dependencies;
}
private function assumeImplicitJoinDependencies($allTablesToQuery, $table)
{
$implicitTableDependencies = $this->implicitTableDependencies;
$result = [];
if (isset($implicitTableDependencies[$table])) {
$result = $implicitTableDependencies[$table];
// only include dependencies that are in the list of requested tables (ie, if we want to
// query from log_conversion joining on log_link_visit_action, we don't want to add log_visit
// to the sql statement)
$result = array_filter($result, function ($table) use ($allTablesToQuery) {
return $this->isInTableArray($allTablesToQuery, $table);
});
}
return $result;
}
private function isInTableArray($tables, $table)
{
foreach ($tables as $entry) {
if (is_string($entry)
&& $entry == $table
) {
return true;
}
if (is_array($entry)
&& $entry['table'] == $table
) {
return true;
}
}
return false;
}
private function parseSqlTables($joinOn, $self)
{
preg_match_all('/\b([a-zA-Z0-9_`]+)\.[a-zA-Z0-9_`]+\b/', $joinOn, $matches);
$tables = [];
foreach ($matches[1] as $table) {
if ($table === $self) {
continue;
}
$tables[] = $table;
}
return $tables;
}
private function visitTableListDfs($tables, $dependencies, $visitor)
{
$visited = [];
foreach ($tables as $index => $tableInfo) {
if (empty($visited[$index])) {
$this->visitTableListDfsSingle($tables, $dependencies, $visitor, $index, $visited);
}
}
}
private function visitTableListDfsSingle($tables, $dependencies, $visitor, $tableToVisitIndex, &$visited)
{
$visited[$tableToVisitIndex] = true;
$tableToVisit = $tables[$tableToVisitIndex];
if (!empty($dependencies[$tableToVisitIndex])) {
foreach ($dependencies[$tableToVisitIndex] as $dependencyTableName) {
$dependentTableToVisit = $this->findTableIndex($tables, $dependencyTableName);
if ($dependentTableToVisit === null) { // sanity check, in case the dependent table is not in the list of tables to query
continue;
}
if (!empty($visited[$dependentTableToVisit])) { // skip if already visited
continue;
}
// visit dependent table...
$this->visitTableListDfsSingle($tables, $dependencies, $visitor, $dependentTableToVisit, $visited);
}
}
// ...then visit current table
$visitor($tableToVisit);
}
private function findTableIndex($tables, $tableToSearchFor)
{
foreach ($tables as $key => $info) {
$tableName = null;
if (is_string($info)) {
$tableName = $info;
} else if (is_array($info)) {
$tableName = isset($info['tableAlias']) ? $info['tableAlias'] : $info['table'];
}
if ($tableName == $tableToSearchFor) {
return $key;
}
}
return null;
}
}

View File

@ -0,0 +1,359 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess;
use Exception;
use Piwik\ArchiveProcessor\Rules;
use Piwik\Common;
use Piwik\Container\StaticContainer;
use Piwik\Db;
use Piwik\DbHelper;
use Piwik\Period;
use Piwik\Segment;
use Piwik\Sequence;
use Psr\Log\LoggerInterface;
/**
* Cleans up outdated archives
*
* @package Piwik\DataAccess
*/
class Model
{
/**
* @var LoggerInterface
*/
private $logger;
public function __construct(LoggerInterface $logger = null)
{
$this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
}
/**
* Returns the archives IDs that have already been invalidated and have been since re-processed.
*
* These archives { archive name (includes segment hash) , idsite, date, period } will be deleted.
*
* @param string $archiveTable
* @param array $idSites
* @return array
* @throws Exception
*/
public function getInvalidatedArchiveIdsSafeToDelete($archiveTable, array $idSites)
{
try {
Db::get()->query('SET SESSION group_concat_max_len=' . (128 * 1024));
} catch (\Exception $ex) {
$this->logger->info("Could not set group_concat_max_len MySQL session variable.");
}
$idSites = array_map(function ($v) { return (int)$v; }, $idSites);
$sql = "SELECT idsite, date1, date2, period, name,
GROUP_CONCAT(idarchive, '.', value ORDER BY ts_archived DESC) as archives
FROM `$archiveTable`
WHERE name LIKE 'done%'
AND value IN (" . ArchiveWriter::DONE_INVALIDATED . ','
. ArchiveWriter::DONE_OK . ','
. ArchiveWriter::DONE_OK_TEMPORARY . ")
AND idsite IN (" . implode(',', $idSites) . ")
GROUP BY idsite, date1, date2, period, name";
$archiveIds = array();
$rows = Db::fetchAll($sql);
foreach ($rows as $row) {
$duplicateArchives = explode(',', $row['archives']);
$firstArchive = array_shift($duplicateArchives);
list($firstArchiveId, $firstArchiveValue) = explode('.', $firstArchive);
// if the first archive (ie, the newest) is an 'ok' or 'ok temporary' archive, then
// all invalidated archives after it can be deleted
if ($firstArchiveValue == ArchiveWriter::DONE_OK
|| $firstArchiveValue == ArchiveWriter::DONE_OK_TEMPORARY
) {
foreach ($duplicateArchives as $pair) {
if (strpos($pair, '.') === false) {
$this->logger->info("GROUP_CONCAT cut off the query result, you may have to purge archives again.");
break;
}
list($idarchive, $value) = explode('.', $pair);
if ($value == ArchiveWriter::DONE_INVALIDATED) {
$archiveIds[] = $idarchive;
}
}
}
}
return $archiveIds;
}
/**
* @param string $archiveTable Prefixed table name
* @param int[] $idSites
* @param string[][] $datesByPeriodType
* @param Segment $segment
* @return \Zend_Db_Statement
* @throws Exception
*/
public function updateArchiveAsInvalidated($archiveTable, $idSites, $datesByPeriodType, Segment $segment = null)
{
$idSites = array_map('intval', $idSites);
$bind = array();
$periodConditions = array();
foreach ($datesByPeriodType as $periodType => $dates) {
$dateConditions = array();
foreach ($dates as $date) {
$dateConditions[] = "(date1 <= ? AND ? <= date2)";
$bind[] = $date;
$bind[] = $date;
}
$dateConditionsSql = implode(" OR ", $dateConditions);
if (empty($periodType)
|| $periodType == Period\Day::PERIOD_ID
) {
// invalidate all periods if no period supplied or period is day
$periodConditions[] = "($dateConditionsSql)";
} else if ($periodType == Period\Range::PERIOD_ID) {
$periodConditions[] = "(period = " . Period\Range::PERIOD_ID . " AND ($dateConditionsSql))";
} else {
// for non-day periods, invalidate greater periods, but not range periods
$periodConditions[] = "(period >= " . (int)$periodType . " AND period < " . Period\Range::PERIOD_ID . " AND ($dateConditionsSql))";
}
}
if ($segment) {
$nameCondition = "name LIKE '" . Rules::getDoneFlagArchiveContainsAllPlugins($segment) . "%'";
} else {
$nameCondition = "name LIKE 'done%'";
}
$sql = "UPDATE $archiveTable SET value = " . ArchiveWriter::DONE_INVALIDATED
. " WHERE $nameCondition
AND idsite IN (" . implode(", ", $idSites) . ")
AND (" . implode(" OR ", $periodConditions) . ")";
return Db::query($sql, $bind);
}
public function getTemporaryArchivesOlderThan($archiveTable, $purgeArchivesOlderThan)
{
$query = "SELECT idarchive FROM " . $archiveTable . "
WHERE name LIKE 'done%'
AND (( value = " . ArchiveWriter::DONE_OK_TEMPORARY . "
AND ts_archived < ?)
OR value = " . ArchiveWriter::DONE_ERROR . ")";
return Db::fetchAll($query, array($purgeArchivesOlderThan));
}
public function deleteArchivesWithPeriod($numericTable, $blobTable, $period, $date)
{
$query = "DELETE FROM %s WHERE period = ? AND ts_archived < ?";
$bind = array($period, $date);
$queryObj = Db::query(sprintf($query, $numericTable), $bind);
$deletedRows = $queryObj->rowCount();
try {
$queryObj = Db::query(sprintf($query, $blobTable), $bind);
$deletedRows += $queryObj->rowCount();
} catch (Exception $e) {
// Individual blob tables could be missing
$this->logger->debug("Unable to delete archives by period from {blobTable}.", array(
'blobTable' => $blobTable,
'exception' => $e,
));
}
return $deletedRows;
}
public function deleteArchiveIds($numericTable, $blobTable, $idsToDelete)
{
$idsToDelete = array_values($idsToDelete);
$query = "DELETE FROM %s WHERE idarchive IN (" . Common::getSqlStringFieldsArray($idsToDelete) . ")";
$queryObj = Db::query(sprintf($query, $numericTable), $idsToDelete);
$deletedRows = $queryObj->rowCount();
try {
$queryObj = Db::query(sprintf($query, $blobTable), $idsToDelete);
$deletedRows += $queryObj->rowCount();
} catch (Exception $e) {
// Individual blob tables could be missing
$this->logger->debug("Unable to delete archive IDs from {blobTable}.", array(
'blobTable' => $blobTable,
'exception' => $e,
));
}
return $deletedRows;
}
public function getArchiveIdAndVisits($numericTable, $idSite, $period, $dateStartIso, $dateEndIso, $minDatetimeIsoArchiveProcessedUTC, $doneFlags, $doneFlagValues)
{
$bindSQL = array($idSite,
$dateStartIso,
$dateEndIso,
$period,
);
$timeStampWhere = '';
if ($minDatetimeIsoArchiveProcessedUTC) {
$timeStampWhere = " AND ts_archived >= ? ";
$bindSQL[] = $minDatetimeIsoArchiveProcessedUTC;
}
$sqlWhereArchiveName = self::getNameCondition($doneFlags, $doneFlagValues);
$sqlQuery = "SELECT idarchive, value, name, date1 as startDate FROM $numericTable
WHERE idsite = ?
AND date1 = ?
AND date2 = ?
AND period = ?
AND ( ($sqlWhereArchiveName)
OR name = '" . ArchiveSelector::NB_VISITS_RECORD_LOOKED_UP . "'
OR name = '" . ArchiveSelector::NB_VISITS_CONVERTED_RECORD_LOOKED_UP . "')
$timeStampWhere
ORDER BY idarchive DESC";
$results = Db::fetchAll($sqlQuery, $bindSQL);
return $results;
}
public function createArchiveTable($tableName, $tableNamePrefix)
{
$db = Db::get();
$sql = DbHelper::getTableCreateSql($tableNamePrefix);
// replace table name template by real name
$tableNamePrefix = Common::prefixTable($tableNamePrefix);
$sql = str_replace($tableNamePrefix, $tableName, $sql);
try {
$db->query($sql);
} catch (Exception $e) {
// accept mysql error 1050: table already exists, throw otherwise
if (!$db->isErrNo($e, '1050')) {
throw $e;
}
}
try {
if (ArchiveTableCreator::NUMERIC_TABLE === ArchiveTableCreator::getTypeFromTableName($tableName)) {
$sequence = new Sequence($tableName);
$sequence->create();
}
} catch (Exception $e) {
}
}
public function allocateNewArchiveId($numericTable)
{
$sequence = new Sequence($numericTable);
try {
$idarchive = $sequence->getNextId();
} catch (Exception $e) {
// edge case: sequence was not found, create it now
$sequence->create();
$idarchive = $sequence->getNextId();
}
return $idarchive;
}
public function deletePreviousArchiveStatus($numericTable, $archiveId, $doneFlag)
{
$tableWithoutLeadingPrefix = $numericTable;
$lenNumericTableWithoutPrefix = strlen('archive_numeric_MM_YYYY');
if (strlen($numericTable) >= $lenNumericTableWithoutPrefix) {
$tableWithoutLeadingPrefix = substr($numericTable, strlen($numericTable) - $lenNumericTableWithoutPrefix);
// we need to make sure lock name is less than 64 characters see https://github.com/piwik/piwik/issues/9131
}
$dbLockName = "rmPrevArchiveStatus.$tableWithoutLeadingPrefix.$archiveId";
// without advisory lock here, the DELETE would acquire Exclusive Lock
$this->acquireArchiveTableLock($dbLockName);
Db::query("DELETE FROM $numericTable WHERE idarchive = ? AND (name = '" . $doneFlag . "')",
array($archiveId)
);
$this->releaseArchiveTableLock($dbLockName);
}
public function insertRecord($tableName, $fields, $record, $name, $value)
{
// duplicate idarchives are Ignored, see https://github.com/piwik/piwik/issues/987
$query = "INSERT IGNORE INTO " . $tableName . " (" . implode(", ", $fields) . ")
VALUES (?,?,?,?,?,?,?,?) ON DUPLICATE KEY UPDATE " . end($fields) . " = ?";
$bindSql = $record;
$bindSql[] = $name;
$bindSql[] = $value;
$bindSql[] = $value;
Db::query($query, $bindSql);
return true;
}
/**
* Returns the site IDs for invalidated archives in an archive table.
*
* @param string $numericTable The numeric table to search through.
* @return int[]
*/
public function getSitesWithInvalidatedArchive($numericTable)
{
$rows = Db::fetchAll("SELECT DISTINCT idsite FROM `$numericTable` WHERE name LIKE 'done%' AND value = " . ArchiveWriter::DONE_INVALIDATED);
$result = array();
foreach ($rows as $row) {
$result[] = $row['idsite'];
}
return $result;
}
/**
* Returns the SQL condition used to find successfully completed archives that
* this instance is querying for.
*/
private static function getNameCondition($doneFlags, $possibleValues)
{
$allDoneFlags = "'" . implode("','", $doneFlags) . "'";
// create the SQL to find archives that are DONE
return "((name IN ($allDoneFlags)) AND (value IN (" . implode(',', $possibleValues) . ")))";
}
protected function acquireArchiveTableLock($dbLockName)
{
if (Db::getDbLock($dbLockName, $maxRetries = 30) === false) {
throw new Exception("Cannot get named lock $dbLockName.");
}
}
protected function releaseArchiveTableLock($dbLockName)
{
Db::releaseDbLock($dbLockName);
}
}

View File

@ -0,0 +1,416 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess;
use Piwik\Common;
use Piwik\Container\StaticContainer;
use Piwik\Db;
use Piwik\Plugin\Dimension\DimensionMetadataProvider;
use Piwik\Plugin\LogTablesProvider;
/**
* DAO that queries log tables.
*/
class RawLogDao
{
const DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME = 'tmp_log_actions_to_keep';
/**
* @var DimensionMetadataProvider
*/
private $dimensionMetadataProvider;
/**
* @var LogTablesProvider
*/
private $logTablesProvider;
public function __construct(DimensionMetadataProvider $provider = null, LogTablesProvider $logTablesProvider = null)
{
$this->dimensionMetadataProvider = $provider ?: StaticContainer::get('Piwik\Plugin\Dimension\DimensionMetadataProvider');
$this->logTablesProvider = $logTablesProvider ?: StaticContainer::get('Piwik\Plugin\LogTablesProvider');
}
/**
* @param array $values
* @param string $idVisit
*/
public function updateVisits(array $values, $idVisit)
{
$sql = "UPDATE " . Common::prefixTable('log_visit')
. " SET " . $this->getColumnSetExpressions(array_keys($values))
. " WHERE idvisit = ?";
$this->update($sql, $values, $idVisit);
}
/**
* @param array $values
* @param string $idVisit
*/
public function updateConversions(array $values, $idVisit)
{
$sql = "UPDATE " . Common::prefixTable('log_conversion')
. " SET " . $this->getColumnSetExpressions(array_keys($values))
. " WHERE idvisit = ?";
$this->update($sql, $values, $idVisit);
}
/**
* @param string $from
* @param string $to
* @return int
*/
public function countVisitsWithDatesLimit($from, $to)
{
$sql = "SELECT COUNT(*) AS num_rows"
. " FROM " . Common::prefixTable('log_visit')
. " WHERE visit_last_action_time >= ? AND visit_last_action_time < ?";
$bind = array($from, $to);
return (int) Db::fetchOne($sql, $bind);
}
/**
* Iterates over logs in a log table in chunks. Parameters to this function are as backend agnostic
* as possible w/o dramatically increasing code complexity.
*
* @param string $logTable The log table name. Unprefixed, eg, `log_visit`.
* @param array[] $conditions An array describing the conditions logs must match in the query. Translates to
* the WHERE part of a SELECT statement. Each element must contain three elements:
*
* * the column name
* * the operator (ie, '=', '<>', '<', etc.)
* * the operand (ie, a value)
*
* The elements are AND-ed together.
*
* Example:
*
* ```
* array(
* array('visit_first_action_time', '>=', ...),
* array('visit_first_action_time', '<', ...)
* )
* ```
* @param int $iterationStep The number of rows to query at a time.
* @param callable $callback The callback that processes each chunk of rows.
*/
public function forAllLogs($logTable, $fields, $conditions, $iterationStep, $callback)
{
$idField = $this->getIdFieldForLogTable($logTable);
list($query, $bind) = $this->createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep);
$lastId = 0;
do {
$rows = Db::fetchAll($query, array_merge(array($lastId), $bind));
if (!empty($rows)) {
$lastId = $rows[count($rows) - 1][$idField];
$callback($rows);
}
} while (count($rows) == $iterationStep);
}
/**
* Deletes conversions for the supplied visit IDs from log_conversion. This method does not cascade, so
* conversion items will not be deleted.
*
* @param int[] $visitIds
* @return int The number of deleted rows.
*/
public function deleteFromLogTable($tableName, $visitIds)
{
$sql = "DELETE FROM `" . Common::prefixTable($tableName) . "` WHERE idvisit IN "
. $this->getInFieldExpressionWithInts($visitIds);
$statement = Db::query($sql);
return $statement->rowCount();
}
/**
* Deletes conversion items for the supplied visit IDs from log_conversion_item.
*
* @param int[] $visitIds
* @return int The number of deleted rows.
*/
public function deleteConversionItems($visitIds)
{
$sql = "DELETE FROM `" . Common::prefixTable('log_conversion_item') . "` WHERE idvisit IN "
. $this->getInFieldExpressionWithInts($visitIds);
$statement = Db::query($sql);
return $statement->rowCount();
}
/**
* Deletes all unused entries from the log_action table. This method uses a temporary table to store used
* actions, and then deletes rows from log_action that are not in this temporary table.
*
* Table locking is required to avoid concurrency issues.
*
* @throws \Exception If table locking permission is not granted to the current MySQL user.
*/
public function deleteUnusedLogActions()
{
if (!Db::isLockPrivilegeGranted()) {
throw new \Exception("RawLogDao.deleteUnusedLogActions() requires table locking permission in order to complete without error.");
}
// get current max ID in log tables w/ idaction references.
$maxIds = $this->getMaxIdsInLogTables();
$this->createTempTableForStoringUsedActions();
// do large insert (inserting everything before maxIds) w/o locking tables...
$this->insertActionsToKeep($maxIds, $deleteOlderThanMax = true);
// ... then do small insert w/ locked tables to minimize the amount of time tables are locked.
$this->lockLogTables();
$this->insertActionsToKeep($maxIds, $deleteOlderThanMax = false);
// delete before unlocking tables so there's no chance a new log row that references an
// unused action will be inserted.
$this->deleteUnusedActions();
Db::unlockAllTables();
$this->dropTempTableForStoringUsedActions();
}
/**
* Returns the list of the website IDs that received some visits between the specified timestamp.
*
* @param string $fromDateTime
* @param string $toDateTime
* @return bool true if there are visits for this site between the given timeframe, false if not
*/
public function hasSiteVisitsBetweenTimeframe($fromDateTime, $toDateTime, $idSite)
{
$sites = Db::fetchOne("SELECT 1
FROM " . Common::prefixTable('log_visit') . "
WHERE idsite = ?
AND visit_last_action_time > ?
AND visit_last_action_time < ?
LIMIT 1", array($idSite, $fromDateTime, $toDateTime));
return (bool) $sites;
}
/**
* @param array $columnsToSet
* @return string
*/
protected function getColumnSetExpressions(array $columnsToSet)
{
$columnsToSet = array_map(
function ($column) {
return $column . ' = ?';
},
$columnsToSet
);
return implode(', ', $columnsToSet);
}
/**
* @param array $values
* @param $idVisit
* @param $sql
* @return \Zend_Db_Statement
* @throws \Exception
*/
protected function update($sql, array $values, $idVisit)
{
return Db::query($sql, array_merge(array_values($values), array($idVisit)));
}
protected function getIdFieldForLogTable($logTable)
{
$idColumns = $this->getTableIdColumns();
if (isset($idColumns[$logTable])) {
return $idColumns[$logTable];
}
throw new \InvalidArgumentException("Unknown log table '$logTable'.");
}
// TODO: instead of creating a log query like this, we should re-use segments. to do this, however, there must be a 1-1
// mapping for dimensions => segments, and each dimension should automatically have a segment.
private function createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep)
{
$bind = array();
$sql = "SELECT " . implode(', ', $fields) . " FROM `" . Common::prefixTable($logTable) . "` WHERE $idField > ?";
foreach ($conditions as $condition) {
list($column, $operator, $value) = $condition;
if (is_array($value)) {
$sql .= " AND $column IN (" . Common::getSqlStringFieldsArray($value) . ")";
$bind = array_merge($bind, $value);
} else {
$sql .= " AND $column $operator ?";
$bind[] = $value;
}
}
$sql .= " ORDER BY $idField ASC LIMIT " . (int)$iterationStep;
return array($sql, $bind);
}
private function getInFieldExpressionWithInts($idVisits)
{
$sql = "(";
$isFirst = true;
foreach ($idVisits as $idVisit) {
if ($isFirst) {
$isFirst = false;
} else {
$sql .= ', ';
}
$sql .= (int)$idVisit;
}
$sql .= ")";
return $sql;
}
protected function getMaxIdsInLogTables()
{
$idColumns = $this->getTableIdColumns();
$tables = array_keys($idColumns);
$result = array();
foreach ($tables as $table) {
$idCol = $idColumns[$table];
$result[$table] = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
}
return $result;
}
private function createTempTableForStoringUsedActions()
{
$sql = "CREATE TEMPORARY TABLE " . Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME) . " (
idaction INT(11),
PRIMARY KEY (idaction)
)";
Db::query($sql);
}
private function dropTempTableForStoringUsedActions()
{
$sql = "DROP TABLE " . Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
Db::query($sql);
}
// protected for testing purposes
protected function insertActionsToKeep($maxIds, $olderThan = true, $insertIntoTempIterationStep = 100000)
{
$tempTableName = Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
$idColumns = $this->getTableIdColumns();
foreach ($this->dimensionMetadataProvider->getActionReferenceColumnsByTable() as $table => $columns) {
$idCol = $idColumns[$table];
// Create select query for requesting ALL needed fields at once
$sql = "SELECT " . implode(',' ,$columns) . " FROM " . Common::prefixTable($table) . " WHERE $idCol >= ? AND $idCol < ?";
if ($olderThan) {
// Why start on zero? When running for a couple of months, this will generate about 10000+ queries with zero result. Use the lowest value instead.... saves a LOT of waiting time!
$start = (int) Db::fetchOne("SELECT MIN($idCol) FROM " . Common::prefixTable($table));;
$finish = $maxIds[$table];
} else {
$start = $maxIds[$table];
$finish = (int) Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
}
// Borrowed from Db::segmentedFetchAll
// Request records per $insertIntoTempIterationStep amount
// Loop over the result set, mapping all numeric fields in a single insert query
// Insert query would be: INSERT IGNORE INTO [temp_table] VALUES (X),(Y),(Z) depending on the amount of fields requested per row
for ($i = $start; $i <= $finish; $i += $insertIntoTempIterationStep) {
$currentParams = array($i, $i + $insertIntoTempIterationStep);
$result = Db::fetchAll($sql, $currentParams);
// Now we loop over the result set of max $insertIntoTempIterationStep rows and create insert queries
$keepValues = [];
foreach ($result as $row) {
$keepValues = array_merge($keepValues, array_filter(array_values($row), "is_numeric"));
if (count($keepValues) >= 1000) {
$insert = 'INSERT IGNORE INTO ' . $tempTableName .' VALUES (';
$insert .= implode('),(', $keepValues);
$insert .= ')';
Db::exec($insert);
$keepValues = [];
}
}
$insert = 'INSERT IGNORE INTO ' . $tempTableName .' VALUES (';
$insert .= implode('),(', $keepValues);
$insert .= ')';
Db::exec($insert);
}
}
}
private function lockLogTables()
{
$tables = $this->getTableIdColumns();
unset($tables['log_action']); // we write lock it
$tableNames = array_keys($tables);
$readLocks = array();
foreach ($tableNames as $tableName) {
$readLocks[] = Common::prefixTable($tableName);
}
Db::lockTables(
$readLocks,
$writeLocks = Common::prefixTables('log_action')
);
}
private function deleteUnusedActions()
{
list($logActionTable, $tempTableName) = Common::prefixTables("log_action", self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
$deleteSql = "DELETE LOW_PRIORITY QUICK IGNORE $logActionTable
FROM $logActionTable
LEFT JOIN $tempTableName tmp ON tmp.idaction = $logActionTable.idaction
WHERE tmp.idaction IS NULL";
Db::query($deleteSql);
}
protected function getTableIdColumns()
{
$columns = array();
foreach ($this->logTablesProvider->getAllLogTables() as $logTable) {
$idColumn = $logTable->getIdColumn();
if (!empty($idColumn)) {
$columns[$logTable->getName()] = $idColumn;
}
}
return $columns;
}
}

View File

@ -0,0 +1,56 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
namespace Piwik\DataAccess;
use Piwik\Db;
/**
* Data Access Object that can be used to get metadata information about
* the MySQL tables Piwik uses.
*/
class TableMetadata
{
/**
* Returns the list of column names for a table.
*
* @param string $table Prefixed table name.
* @return string[] List of column names..
*/
public function getColumns($table)
{
$table = str_replace("`", "", $table);
$columns = Db::fetchAll("SHOW COLUMNS FROM `" . $table . "`");
$columnNames = array();
foreach ($columns as $column) {
$columnNames[] = $column['Field'];
}
return $columnNames;
}
/**
* Returns the list of idaction columns in a table. A column is
* assumed to be an idaction reference if it has `"idaction"` in its
* name (eg, `"idaction_url"` or `"idaction_content_name"`.
*
* @param string $table Prefixed table name.
* @return string[]
*/
public function getIdActionColumnNames($table)
{
$columns = $this->getColumns($table);
$columns = array_filter($columns, function ($columnName) {
return strpos($columnName, 'idaction') !== false;
});
return array_values($columns);
}
}