PDF rausgenommen
This commit is contained in:
34
msd2/tracking/piwik/core/DataAccess/Actions.php
Normal file
34
msd2/tracking/piwik/core/DataAccess/Actions.php
Normal file
@ -0,0 +1,34 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*/
|
||||
namespace Piwik\DataAccess;
|
||||
|
||||
use Piwik\Db;
|
||||
use Piwik\Common;
|
||||
|
||||
/**
|
||||
* Data Access Object for operations dealing with the log_action table.
|
||||
*/
|
||||
class Actions
|
||||
{
|
||||
/**
|
||||
* Removes a list of actions from the log_action table by ID.
|
||||
*
|
||||
* @param int[] $idActions
|
||||
*/
|
||||
public function delete($idActions)
|
||||
{
|
||||
foreach ($idActions as &$id) {
|
||||
$id = (int)$id;
|
||||
}
|
||||
|
||||
$table = Common::prefixTable('log_action');
|
||||
|
||||
$sql = "DELETE FROM $table WHERE idaction IN (" . implode(",", $idActions) . ")";
|
||||
Db::query($sql);
|
||||
}
|
||||
}
|
371
msd2/tracking/piwik/core/DataAccess/ArchiveSelector.php
Normal file
371
msd2/tracking/piwik/core/DataAccess/ArchiveSelector.php
Normal file
@ -0,0 +1,371 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*
|
||||
*/
|
||||
namespace Piwik\DataAccess;
|
||||
|
||||
use Exception;
|
||||
use Piwik\Archive;
|
||||
use Piwik\Archive\Chunk;
|
||||
use Piwik\ArchiveProcessor;
|
||||
use Piwik\ArchiveProcessor\Rules;
|
||||
use Piwik\Common;
|
||||
use Piwik\Date;
|
||||
use Piwik\Db;
|
||||
use Piwik\Period;
|
||||
use Piwik\Period\Range;
|
||||
use Piwik\Segment;
|
||||
|
||||
/**
|
||||
* Data Access object used to query archives
|
||||
*
|
||||
* A record in the Database for a given report is defined by
|
||||
* - idarchive = unique ID that is associated to all the data of this archive (idsite+period+date)
|
||||
* - idsite = the ID of the website
|
||||
* - date1 = starting day of the period
|
||||
* - date2 = ending day of the period
|
||||
* - period = integer that defines the period (day/week/etc.). @see period::getId()
|
||||
* - ts_archived = timestamp when the archive was processed (UTC)
|
||||
* - name = the name of the report (ex: uniq_visitors or search_keywords_by_search_engines)
|
||||
* - value = the actual data (a numeric value, or a blob of compressed serialized data)
|
||||
*
|
||||
*/
|
||||
class ArchiveSelector
|
||||
{
|
||||
const NB_VISITS_RECORD_LOOKED_UP = "nb_visits";
|
||||
|
||||
const NB_VISITS_CONVERTED_RECORD_LOOKED_UP = "nb_visits_converted";
|
||||
|
||||
private static function getModel()
|
||||
{
|
||||
return new Model();
|
||||
}
|
||||
|
||||
public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params, $minDatetimeArchiveProcessedUTC)
|
||||
{
|
||||
$idSite = $params->getSite()->getId();
|
||||
$period = $params->getPeriod()->getId();
|
||||
$dateStart = $params->getPeriod()->getDateStart();
|
||||
$dateStartIso = $dateStart->toString('Y-m-d');
|
||||
$dateEndIso = $params->getPeriod()->getDateEnd()->toString('Y-m-d');
|
||||
|
||||
$numericTable = ArchiveTableCreator::getNumericTable($dateStart);
|
||||
|
||||
$minDatetimeIsoArchiveProcessedUTC = null;
|
||||
if ($minDatetimeArchiveProcessedUTC) {
|
||||
$minDatetimeIsoArchiveProcessedUTC = Date::factory($minDatetimeArchiveProcessedUTC)->getDatetime();
|
||||
}
|
||||
|
||||
$requestedPlugin = $params->getRequestedPlugin();
|
||||
$segment = $params->getSegment();
|
||||
$plugins = array("VisitsSummary", $requestedPlugin);
|
||||
|
||||
$doneFlags = Rules::getDoneFlags($plugins, $segment);
|
||||
$doneFlagValues = Rules::getSelectableDoneFlagValues();
|
||||
|
||||
$results = self::getModel()->getArchiveIdAndVisits($numericTable, $idSite, $period, $dateStartIso, $dateEndIso, $minDatetimeIsoArchiveProcessedUTC, $doneFlags, $doneFlagValues);
|
||||
|
||||
if (empty($results)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$idArchive = self::getMostRecentIdArchiveFromResults($segment, $requestedPlugin, $results);
|
||||
|
||||
$idArchiveVisitsSummary = self::getMostRecentIdArchiveFromResults($segment, "VisitsSummary", $results);
|
||||
|
||||
list($visits, $visitsConverted) = self::getVisitsMetricsFromResults($idArchive, $idArchiveVisitsSummary, $results);
|
||||
|
||||
if (false === $visits && false === $idArchive) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return array($idArchive, $visits, $visitsConverted);
|
||||
}
|
||||
|
||||
protected static function getVisitsMetricsFromResults($idArchive, $idArchiveVisitsSummary, $results)
|
||||
{
|
||||
$visits = $visitsConverted = false;
|
||||
$archiveWithVisitsMetricsWasFound = ($idArchiveVisitsSummary !== false);
|
||||
|
||||
if ($archiveWithVisitsMetricsWasFound) {
|
||||
$visits = $visitsConverted = 0;
|
||||
}
|
||||
|
||||
foreach ($results as $result) {
|
||||
if (in_array($result['idarchive'], array($idArchive, $idArchiveVisitsSummary))) {
|
||||
$value = (int)$result['value'];
|
||||
if (empty($visits)
|
||||
&& $result['name'] == self::NB_VISITS_RECORD_LOOKED_UP
|
||||
) {
|
||||
$visits = $value;
|
||||
}
|
||||
if (empty($visitsConverted)
|
||||
&& $result['name'] == self::NB_VISITS_CONVERTED_RECORD_LOOKED_UP
|
||||
) {
|
||||
$visitsConverted = $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return array($visits, $visitsConverted);
|
||||
}
|
||||
|
||||
protected static function getMostRecentIdArchiveFromResults(Segment $segment, $requestedPlugin, $results)
|
||||
{
|
||||
$idArchive = false;
|
||||
$namesRequestedPlugin = Rules::getDoneFlags(array($requestedPlugin), $segment);
|
||||
|
||||
foreach ($results as $result) {
|
||||
if ($idArchive === false
|
||||
&& in_array($result['name'], $namesRequestedPlugin)
|
||||
) {
|
||||
$idArchive = $result['idarchive'];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $idArchive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries and returns archive IDs for a set of sites, periods, and a segment.
|
||||
*
|
||||
* @param array $siteIds
|
||||
* @param array $periods
|
||||
* @param Segment $segment
|
||||
* @param array $plugins List of plugin names for which data is being requested.
|
||||
* @return array Archive IDs are grouped by archive name and period range, ie,
|
||||
* array(
|
||||
* 'VisitsSummary.done' => array(
|
||||
* '2010-01-01' => array(1,2,3)
|
||||
* )
|
||||
* )
|
||||
* @throws
|
||||
*/
|
||||
public static function getArchiveIds($siteIds, $periods, $segment, $plugins)
|
||||
{
|
||||
if (empty($siteIds)) {
|
||||
throw new \Exception("Website IDs could not be read from the request, ie. idSite=");
|
||||
}
|
||||
|
||||
foreach ($siteIds as $index => $siteId) {
|
||||
$siteIds[$index] = (int) $siteId;
|
||||
}
|
||||
|
||||
$getArchiveIdsSql = "SELECT idsite, name, date1, date2, MAX(idarchive) as idarchive
|
||||
FROM %s
|
||||
WHERE idsite IN (" . implode(',', $siteIds) . ")
|
||||
AND " . self::getNameCondition($plugins, $segment) . "
|
||||
AND %s
|
||||
GROUP BY idsite, date1, date2, name";
|
||||
|
||||
$monthToPeriods = array();
|
||||
foreach ($periods as $period) {
|
||||
/** @var Period $period */
|
||||
if ($period->getDateStart()->isLater(Date::now()->addDay(2))) {
|
||||
continue; // avoid creating any archive tables in the future
|
||||
}
|
||||
$table = ArchiveTableCreator::getNumericTable($period->getDateStart());
|
||||
$monthToPeriods[$table][] = $period;
|
||||
}
|
||||
|
||||
// for every month within the archive query, select from numeric table
|
||||
$result = array();
|
||||
foreach ($monthToPeriods as $table => $periods) {
|
||||
$firstPeriod = reset($periods);
|
||||
|
||||
$bind = array();
|
||||
|
||||
if ($firstPeriod instanceof Range) {
|
||||
$dateCondition = "date1 = ? AND date2 = ?";
|
||||
$bind[] = $firstPeriod->getDateStart()->toString('Y-m-d');
|
||||
$bind[] = $firstPeriod->getDateEnd()->toString('Y-m-d');
|
||||
} else {
|
||||
// we assume there is no range date in $periods
|
||||
$dateCondition = '(';
|
||||
|
||||
foreach ($periods as $period) {
|
||||
if (strlen($dateCondition) > 1) {
|
||||
$dateCondition .= ' OR ';
|
||||
}
|
||||
|
||||
$dateCondition .= "(period = ? AND date1 = ? AND date2 = ?)";
|
||||
$bind[] = $period->getId();
|
||||
$bind[] = $period->getDateStart()->toString('Y-m-d');
|
||||
$bind[] = $period->getDateEnd()->toString('Y-m-d');
|
||||
}
|
||||
|
||||
$dateCondition .= ')';
|
||||
}
|
||||
|
||||
$sql = sprintf($getArchiveIdsSql, $table, $dateCondition);
|
||||
|
||||
$archiveIds = Db::fetchAll($sql, $bind);
|
||||
|
||||
// get the archive IDs
|
||||
foreach ($archiveIds as $row) {
|
||||
//FIXMEA duplicate with Archive.php
|
||||
$dateStr = $row['date1'] . ',' . $row['date2'];
|
||||
|
||||
$result[$row['name']][$dateStr][] = $row['idarchive'];
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries and returns archive data using a set of archive IDs.
|
||||
*
|
||||
* @param array $archiveIds The IDs of the archives to get data from.
|
||||
* @param array $recordNames The names of the data to retrieve (ie, nb_visits, nb_actions, etc.).
|
||||
* Note: You CANNOT pass multiple recordnames if $loadAllSubtables=true.
|
||||
* @param string $archiveDataType The archive data type (either, 'blob' or 'numeric').
|
||||
* @param int|null|string $idSubtable null if the root blob should be loaded, an integer if a subtable should be
|
||||
* loaded and 'all' if all subtables should be loaded.
|
||||
* @throws Exception
|
||||
* @return array
|
||||
*/
|
||||
public static function getArchiveData($archiveIds, $recordNames, $archiveDataType, $idSubtable)
|
||||
{
|
||||
$chunk = new Chunk();
|
||||
|
||||
// create the SQL to select archive data
|
||||
$loadAllSubtables = $idSubtable == Archive::ID_SUBTABLE_LOAD_ALL_SUBTABLES;
|
||||
if ($loadAllSubtables) {
|
||||
$name = reset($recordNames);
|
||||
|
||||
// select blobs w/ name like "$name_[0-9]+" w/o using RLIKE
|
||||
$nameEnd = strlen($name) + 1;
|
||||
$nameEndAppendix = $nameEnd + 1;
|
||||
$appendix = $chunk->getAppendix();
|
||||
$lenAppendix = strlen($appendix);
|
||||
|
||||
$checkForChunkBlob = "SUBSTRING(name, $nameEnd, $lenAppendix) = '$appendix'";
|
||||
$checkForSubtableId = "(SUBSTRING(name, $nameEndAppendix, 1) >= '0'
|
||||
AND SUBSTRING(name, $nameEndAppendix, 1) <= '9')";
|
||||
|
||||
$whereNameIs = "(name = ? OR (name LIKE ? AND ( $checkForChunkBlob OR $checkForSubtableId ) ))";
|
||||
$bind = array($name, $name . '%');
|
||||
} else {
|
||||
if ($idSubtable === null) {
|
||||
// select root table or specific record names
|
||||
$bind = array_values($recordNames);
|
||||
} else {
|
||||
// select a subtable id
|
||||
$bind = array();
|
||||
foreach ($recordNames as $recordName) {
|
||||
// to be backwards compatibe we need to look for the exact idSubtable blob and for the chunk
|
||||
// that stores the subtables (a chunk stores many blobs in one blob)
|
||||
$bind[] = $chunk->getRecordNameForTableId($recordName, $idSubtable);
|
||||
$bind[] = self::appendIdSubtable($recordName, $idSubtable);
|
||||
}
|
||||
}
|
||||
|
||||
$inNames = Common::getSqlStringFieldsArray($bind);
|
||||
$whereNameIs = "name IN ($inNames)";
|
||||
}
|
||||
|
||||
$getValuesSql = "SELECT value, name, idsite, date1, date2, ts_archived
|
||||
FROM %s
|
||||
WHERE idarchive IN (%s)
|
||||
AND " . $whereNameIs;
|
||||
|
||||
// get data from every table we're querying
|
||||
$rows = array();
|
||||
foreach ($archiveIds as $period => $ids) {
|
||||
if (empty($ids)) {
|
||||
throw new Exception("Unexpected: id archive not found for period '$period' '");
|
||||
}
|
||||
|
||||
// $period = "2009-01-04,2009-01-04",
|
||||
$date = Date::factory(substr($period, 0, 10));
|
||||
|
||||
$isNumeric = $archiveDataType == 'numeric';
|
||||
if ($isNumeric) {
|
||||
$table = ArchiveTableCreator::getNumericTable($date);
|
||||
} else {
|
||||
$table = ArchiveTableCreator::getBlobTable($date);
|
||||
}
|
||||
|
||||
$sql = sprintf($getValuesSql, $table, implode(',', $ids));
|
||||
$dataRows = Db::fetchAll($sql, $bind);
|
||||
|
||||
foreach ($dataRows as $row) {
|
||||
if ($isNumeric) {
|
||||
$rows[] = $row;
|
||||
} else {
|
||||
$row['value'] = self::uncompress($row['value']);
|
||||
|
||||
if ($chunk->isRecordNameAChunk($row['name'])) {
|
||||
self::moveChunkRowToRows($rows, $row, $chunk, $loadAllSubtables, $idSubtable);
|
||||
} else {
|
||||
$rows[] = $row;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $rows;
|
||||
}
|
||||
|
||||
private static function moveChunkRowToRows(&$rows, $row, Chunk $chunk, $loadAllSubtables, $idSubtable)
|
||||
{
|
||||
// $blobs = array([subtableID] = [blob of subtableId])
|
||||
$blobs = Common::safe_unserialize($row['value']);
|
||||
|
||||
if (!is_array($blobs)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// $rawName = eg 'PluginName_ArchiveName'
|
||||
$rawName = $chunk->getRecordNameWithoutChunkAppendix($row['name']);
|
||||
|
||||
if ($loadAllSubtables) {
|
||||
foreach ($blobs as $subtableId => $blob) {
|
||||
$row['value'] = $blob;
|
||||
$row['name'] = self::appendIdSubtable($rawName, $subtableId);
|
||||
$rows[] = $row;
|
||||
}
|
||||
} elseif (array_key_exists($idSubtable, $blobs)) {
|
||||
$row['value'] = $blobs[$idSubtable];
|
||||
$row['name'] = self::appendIdSubtable($rawName, $idSubtable);
|
||||
$rows[] = $row;
|
||||
}
|
||||
}
|
||||
|
||||
public static function appendIdSubtable($recordName, $id)
|
||||
{
|
||||
return $recordName . "_" . $id;
|
||||
}
|
||||
|
||||
private static function uncompress($data)
|
||||
{
|
||||
return @gzuncompress($data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the SQL condition used to find successfully completed archives that
|
||||
* this instance is querying for.
|
||||
*
|
||||
* @param array $plugins
|
||||
* @param Segment $segment
|
||||
* @return string
|
||||
*/
|
||||
private static function getNameCondition(array $plugins, Segment $segment)
|
||||
{
|
||||
// the flags used to tell how the archiving process for a specific archive was completed,
|
||||
// if it was completed
|
||||
$doneFlags = Rules::getDoneFlags($plugins, $segment);
|
||||
$allDoneFlags = "'" . implode("','", $doneFlags) . "'";
|
||||
|
||||
$possibleValues = Rules::getSelectableDoneFlagValues();
|
||||
|
||||
// create the SQL to find archives that are DONE
|
||||
return "((name IN ($allDoneFlags)) AND (value IN (" . implode(',', $possibleValues) . ")))";
|
||||
}
|
||||
}
|
123
msd2/tracking/piwik/core/DataAccess/ArchiveTableCreator.php
Normal file
123
msd2/tracking/piwik/core/DataAccess/ArchiveTableCreator.php
Normal file
@ -0,0 +1,123 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Piwik\DataAccess;
|
||||
|
||||
use Piwik\Common;
|
||||
use Piwik\Date;
|
||||
use Piwik\DbHelper;
|
||||
|
||||
class ArchiveTableCreator
|
||||
{
|
||||
const NUMERIC_TABLE = "numeric";
|
||||
const BLOB_TABLE = "blob";
|
||||
|
||||
public static $tablesAlreadyInstalled = null;
|
||||
|
||||
public static function getNumericTable(Date $date)
|
||||
{
|
||||
return self::getTable($date, self::NUMERIC_TABLE);
|
||||
}
|
||||
|
||||
public static function getBlobTable(Date $date)
|
||||
{
|
||||
return self::getTable($date, self::BLOB_TABLE);
|
||||
}
|
||||
|
||||
protected static function getTable(Date $date, $type)
|
||||
{
|
||||
$tableNamePrefix = "archive_" . $type;
|
||||
$tableName = $tableNamePrefix . "_" . self::getTableMonthFromDate($date);
|
||||
$tableName = Common::prefixTable($tableName);
|
||||
|
||||
self::createArchiveTablesIfAbsent($tableName, $tableNamePrefix);
|
||||
|
||||
return $tableName;
|
||||
}
|
||||
|
||||
protected static function createArchiveTablesIfAbsent($tableName, $tableNamePrefix)
|
||||
{
|
||||
if (is_null(self::$tablesAlreadyInstalled)) {
|
||||
self::refreshTableList();
|
||||
}
|
||||
|
||||
if (!in_array($tableName, self::$tablesAlreadyInstalled)) {
|
||||
self::getModel()->createArchiveTable($tableName, $tableNamePrefix);
|
||||
self::$tablesAlreadyInstalled[] = $tableName;
|
||||
}
|
||||
}
|
||||
|
||||
private static function getModel()
|
||||
{
|
||||
return new Model();
|
||||
}
|
||||
|
||||
public static function clear()
|
||||
{
|
||||
self::$tablesAlreadyInstalled = null;
|
||||
}
|
||||
|
||||
public static function refreshTableList($forceReload = false)
|
||||
{
|
||||
self::$tablesAlreadyInstalled = DbHelper::getTablesInstalled($forceReload);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all table names archive_*
|
||||
*
|
||||
* @param string $type The type of table to return. Either `self::NUMERIC_TABLE` or `self::BLOB_TABLE`.
|
||||
* @return array
|
||||
*/
|
||||
public static function getTablesArchivesInstalled($type = null)
|
||||
{
|
||||
if (is_null(self::$tablesAlreadyInstalled)) {
|
||||
self::refreshTableList();
|
||||
}
|
||||
|
||||
if (empty($type)) {
|
||||
$tableMatchRegex = '/archive_(numeric|blob)_/';
|
||||
} else {
|
||||
$tableMatchRegex = '/archive_' . preg_quote($type) . '_/';
|
||||
}
|
||||
|
||||
$archiveTables = array();
|
||||
foreach (self::$tablesAlreadyInstalled as $table) {
|
||||
if (preg_match($tableMatchRegex, $table)) {
|
||||
$archiveTables[] = $table;
|
||||
}
|
||||
}
|
||||
return $archiveTables;
|
||||
}
|
||||
|
||||
public static function getDateFromTableName($tableName)
|
||||
{
|
||||
$tableName = Common::unprefixTable($tableName);
|
||||
$date = str_replace(array('archive_numeric_', 'archive_blob_'), '', $tableName);
|
||||
|
||||
return $date;
|
||||
}
|
||||
|
||||
public static function getTableMonthFromDate(Date $date)
|
||||
{
|
||||
return $date->toString('Y_m');
|
||||
}
|
||||
|
||||
public static function getTypeFromTableName($tableName)
|
||||
{
|
||||
if (strpos($tableName, 'archive_numeric_') !== false) {
|
||||
return self::NUMERIC_TABLE;
|
||||
}
|
||||
|
||||
if (strpos($tableName, 'archive_blob_') !== false) {
|
||||
return self::BLOB_TABLE;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
90
msd2/tracking/piwik/core/DataAccess/ArchiveTableDao.php
Normal file
90
msd2/tracking/piwik/core/DataAccess/ArchiveTableDao.php
Normal file
@ -0,0 +1,90 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*/
|
||||
|
||||
namespace Piwik\DataAccess;
|
||||
|
||||
use Piwik\Common;
|
||||
use Piwik\Config;
|
||||
use Piwik\Db;
|
||||
|
||||
/**
|
||||
* Data Access class for querying numeric & blob archive tables.
|
||||
*/
|
||||
class ArchiveTableDao
|
||||
{
|
||||
/**
|
||||
* Analyzes numeric & blob tables for a single table date (ie, `'2015_01'`) and returns
|
||||
* statistics including:
|
||||
*
|
||||
* - number of archives present
|
||||
* - number of invalidated archives
|
||||
* - number of temporary archives
|
||||
* - number of error archives
|
||||
* - number of segment archives
|
||||
* - number of numeric rows
|
||||
* - number of blob rows
|
||||
*
|
||||
* @param string $tableDate ie `'2015_01'`
|
||||
* @return array
|
||||
*/
|
||||
public function getArchiveTableAnalysis($tableDate)
|
||||
{
|
||||
$numericQueryEmptyRow = array(
|
||||
'count_archives' => '-',
|
||||
'count_invalidated_archives' => '-',
|
||||
'count_temporary_archives' => '-',
|
||||
'count_error_archives' => '-',
|
||||
'count_segment_archives' => '-',
|
||||
'count_numeric_rows' => '-',
|
||||
);
|
||||
|
||||
$tableDate = str_replace("`", "", $tableDate); // for sanity
|
||||
|
||||
$numericTable = Common::prefixTable("archive_numeric_$tableDate");
|
||||
$blobTable = Common::prefixTable("archive_blob_$tableDate");
|
||||
|
||||
// query numeric table
|
||||
$sql = "SELECT CONCAT_WS('.', idsite, date1, date2, period) AS label,
|
||||
SUM(CASE WHEN name LIKE 'done%' THEN 1 ELSE 0 END) AS count_archives,
|
||||
SUM(CASE WHEN name LIKE 'done%' AND value = ? THEN 1 ELSE 0 END) AS count_invalidated_archives,
|
||||
SUM(CASE WHEN name LIKE 'done%' AND value = ? THEN 1 ELSE 0 END) AS count_temporary_archives,
|
||||
SUM(CASE WHEN name LIKE 'done%' AND value = ? THEN 1 ELSE 0 END) AS count_error_archives,
|
||||
SUM(CASE WHEN name LIKE 'done%' AND CHAR_LENGTH(name) > 32 THEN 1 ELSE 0 END) AS count_segment_archives,
|
||||
SUM(CASE WHEN name NOT LIKE 'done%' THEN 1 ELSE 0 END) AS count_numeric_rows,
|
||||
0 AS count_blob_rows
|
||||
FROM `$numericTable`
|
||||
GROUP BY idsite, date1, date2, period";
|
||||
|
||||
$rows = Db::fetchAll($sql, array(ArchiveWriter::DONE_INVALIDATED, ArchiveWriter::DONE_OK_TEMPORARY,
|
||||
ArchiveWriter::DONE_ERROR));
|
||||
|
||||
// index result
|
||||
$result = array();
|
||||
foreach ($rows as $row) {
|
||||
$result[$row['label']] = $row;
|
||||
}
|
||||
|
||||
// query blob table & manually merge results (no FULL OUTER JOIN in mysql)
|
||||
$sql = "SELECT CONCAT_WS('.', idsite, date1, date2, period) AS label,
|
||||
COUNT(*) AS count_blob_rows,
|
||||
SUM(OCTET_LENGTH(value)) AS sum_blob_length
|
||||
FROM `$blobTable`
|
||||
GROUP BY idsite, date1, date1, period";
|
||||
|
||||
foreach (Db::fetchAll($sql) as $blobStatsRow) {
|
||||
$label = $blobStatsRow['label'];
|
||||
if (isset($result[$label])) {
|
||||
$result[$label] = array_merge($result[$label], $blobStatsRow);
|
||||
} else {
|
||||
$result[$label] = $blobStatsRow + $numericQueryEmptyRow;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
}
|
276
msd2/tracking/piwik/core/DataAccess/ArchiveWriter.php
Normal file
276
msd2/tracking/piwik/core/DataAccess/ArchiveWriter.php
Normal file
@ -0,0 +1,276 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*
|
||||
*/
|
||||
namespace Piwik\DataAccess;
|
||||
|
||||
use Exception;
|
||||
use Piwik\Archive;
|
||||
use Piwik\Archive\Chunk;
|
||||
use Piwik\ArchiveProcessor\Rules;
|
||||
use Piwik\ArchiveProcessor;
|
||||
use Piwik\Db;
|
||||
use Piwik\Db\BatchInsert;
|
||||
use Piwik\Period;
|
||||
|
||||
/**
|
||||
* This class is used to create a new Archive.
|
||||
* An Archive is a set of reports (numeric and data tables).
|
||||
* New data can be inserted in the archive with insertRecord/insertBulkRecords
|
||||
*/
|
||||
class ArchiveWriter
|
||||
{
|
||||
/**
|
||||
* Flag stored at the end of the archiving
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
const DONE_OK = 1;
|
||||
/**
|
||||
* Flag stored at the start of the archiving
|
||||
* When requesting an Archive, we make sure that non-finished archive are not considered valid
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
const DONE_ERROR = 2;
|
||||
/**
|
||||
* Flag indicates the archive is over a period that is not finished, eg. the current day, current week, etc.
|
||||
* Archives flagged will be regularly purged from the DB.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
const DONE_OK_TEMPORARY = 3;
|
||||
|
||||
/**
|
||||
* Flag indicated that archive is done but was marked as invalid later and needs to be re-processed during next archiving process
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
const DONE_INVALIDATED = 4;
|
||||
|
||||
protected $fields = array('idarchive',
|
||||
'idsite',
|
||||
'date1',
|
||||
'date2',
|
||||
'period',
|
||||
'ts_archived',
|
||||
'name',
|
||||
'value');
|
||||
|
||||
public function __construct(ArchiveProcessor\Parameters $params, $isArchiveTemporary)
|
||||
{
|
||||
$this->idArchive = false;
|
||||
$this->idSite = $params->getSite()->getId();
|
||||
$this->segment = $params->getSegment();
|
||||
$this->period = $params->getPeriod();
|
||||
|
||||
$idSites = array($this->idSite);
|
||||
$this->doneFlag = Rules::getDoneStringFlagFor($idSites, $this->segment, $this->period->getLabel(), $params->getRequestedPlugin());
|
||||
$this->isArchiveTemporary = $isArchiveTemporary;
|
||||
|
||||
$this->dateStart = $this->period->getDateStart();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $name
|
||||
* @param string|string[] $values A blob string or an array of blob strings. If an array
|
||||
* is used, the first element in the array will be inserted
|
||||
* with the `$name` name. The others will be splitted into chunks. All subtables
|
||||
* within one chunk will be serialized as an array where the index is the
|
||||
* subtableId.
|
||||
*/
|
||||
public function insertBlobRecord($name, $values)
|
||||
{
|
||||
if (is_array($values)) {
|
||||
$clean = array();
|
||||
|
||||
if (isset($values[0])) {
|
||||
// we always store the root table in a single blob for fast access
|
||||
$clean[] = array($name, $this->compress($values[0]));
|
||||
unset($values[0]);
|
||||
}
|
||||
|
||||
if (!empty($values)) {
|
||||
// we move all subtables into chunks
|
||||
$chunk = new Chunk();
|
||||
$chunks = $chunk->moveArchiveBlobsIntoChunks($name, $values);
|
||||
foreach ($chunks as $index => $subtables) {
|
||||
$clean[] = array($index, $this->compress(serialize($subtables)));
|
||||
}
|
||||
}
|
||||
|
||||
$this->insertBulkRecords($clean);
|
||||
return;
|
||||
}
|
||||
|
||||
$values = $this->compress($values);
|
||||
$this->insertRecord($name, $values);
|
||||
}
|
||||
|
||||
public function getIdArchive()
|
||||
{
|
||||
if ($this->idArchive === false) {
|
||||
throw new Exception("Must call allocateNewArchiveId() first");
|
||||
}
|
||||
|
||||
return $this->idArchive;
|
||||
}
|
||||
|
||||
public function initNewArchive()
|
||||
{
|
||||
$this->allocateNewArchiveId();
|
||||
$this->logArchiveStatusAsIncomplete();
|
||||
}
|
||||
|
||||
public function finalizeArchive()
|
||||
{
|
||||
$numericTable = $this->getTableNumeric();
|
||||
$idArchive = $this->getIdArchive();
|
||||
|
||||
$this->getModel()->deletePreviousArchiveStatus($numericTable, $idArchive, $this->doneFlag);
|
||||
|
||||
$this->logArchiveStatusAsFinal();
|
||||
}
|
||||
|
||||
protected function compress($data)
|
||||
{
|
||||
if (Db::get()->hasBlobDataType()) {
|
||||
return gzcompress($data);
|
||||
}
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
protected function allocateNewArchiveId()
|
||||
{
|
||||
$numericTable = $this->getTableNumeric();
|
||||
|
||||
$this->idArchive = $this->getModel()->allocateNewArchiveId($numericTable);
|
||||
return $this->idArchive;
|
||||
}
|
||||
|
||||
private function getModel()
|
||||
{
|
||||
return new Model();
|
||||
}
|
||||
|
||||
protected function logArchiveStatusAsIncomplete()
|
||||
{
|
||||
$this->insertRecord($this->doneFlag, self::DONE_ERROR);
|
||||
}
|
||||
|
||||
protected function logArchiveStatusAsFinal()
|
||||
{
|
||||
$status = self::DONE_OK;
|
||||
|
||||
if ($this->isArchiveTemporary) {
|
||||
$status = self::DONE_OK_TEMPORARY;
|
||||
}
|
||||
|
||||
$this->insertRecord($this->doneFlag, $status);
|
||||
}
|
||||
|
||||
protected function insertBulkRecords($records)
|
||||
{
|
||||
// Using standard plain INSERT if there is only one record to insert
|
||||
if ($DEBUG_DO_NOT_USE_BULK_INSERT = false
|
||||
|| count($records) == 1
|
||||
) {
|
||||
foreach ($records as $record) {
|
||||
$this->insertRecord($record[0], $record[1]);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
$bindSql = $this->getInsertRecordBind();
|
||||
$values = array();
|
||||
|
||||
$valueSeen = false;
|
||||
foreach ($records as $record) {
|
||||
// don't record zero
|
||||
if (empty($record[1])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$bind = $bindSql;
|
||||
$bind[] = $record[0]; // name
|
||||
$bind[] = $record[1]; // value
|
||||
$values[] = $bind;
|
||||
|
||||
$valueSeen = $record[1];
|
||||
}
|
||||
|
||||
if (empty($values)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$tableName = $this->getTableNameToInsert($valueSeen);
|
||||
$fields = $this->getInsertFields();
|
||||
|
||||
BatchInsert::tableInsertBatch($tableName, $fields, $values, $throwException = false, $charset = 'latin1');
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts a record in the right table (either NUMERIC or BLOB)
|
||||
*
|
||||
* @param string $name
|
||||
* @param mixed $value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function insertRecord($name, $value)
|
||||
{
|
||||
if ($this->isRecordZero($value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$tableName = $this->getTableNameToInsert($value);
|
||||
$fields = $this->getInsertFields();
|
||||
$record = $this->getInsertRecordBind();
|
||||
|
||||
$this->getModel()->insertRecord($tableName, $fields, $record, $name, $value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected function getInsertRecordBind()
|
||||
{
|
||||
return array($this->getIdArchive(),
|
||||
$this->idSite,
|
||||
$this->dateStart->toString('Y-m-d'),
|
||||
$this->period->getDateEnd()->toString('Y-m-d'),
|
||||
$this->period->getId(),
|
||||
date("Y-m-d H:i:s"));
|
||||
}
|
||||
|
||||
protected function getTableNameToInsert($value)
|
||||
{
|
||||
if (is_numeric($value)) {
|
||||
return $this->getTableNumeric();
|
||||
}
|
||||
|
||||
return ArchiveTableCreator::getBlobTable($this->dateStart);
|
||||
}
|
||||
|
||||
protected function getTableNumeric()
|
||||
{
|
||||
return ArchiveTableCreator::getNumericTable($this->dateStart);
|
||||
}
|
||||
|
||||
protected function getInsertFields()
|
||||
{
|
||||
return $this->fields;
|
||||
}
|
||||
|
||||
protected function isRecordZero($value)
|
||||
{
|
||||
return ($value === '0' || $value === false || $value === 0 || $value === 0.0);
|
||||
}
|
||||
}
|
961
msd2/tracking/piwik/core/DataAccess/LogAggregator.php
Normal file
961
msd2/tracking/piwik/core/DataAccess/LogAggregator.php
Normal file
@ -0,0 +1,961 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*
|
||||
*/
|
||||
namespace Piwik\DataAccess;
|
||||
|
||||
use Piwik\ArchiveProcessor\Parameters;
|
||||
use Piwik\Common;
|
||||
use Piwik\Container\StaticContainer;
|
||||
use Piwik\DataArray;
|
||||
use Piwik\Date;
|
||||
use Piwik\Db;
|
||||
use Piwik\Metrics;
|
||||
use Piwik\Period;
|
||||
use Piwik\Tracker\GoalManager;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
/**
|
||||
* Contains methods that calculate metrics by aggregating log data (visits, actions, conversions,
|
||||
* ecommerce items).
|
||||
*
|
||||
* You can use the methods in this class within {@link Piwik\Plugin\Archiver Archiver} descendants
|
||||
* to aggregate log data without having to write SQL queries.
|
||||
*
|
||||
* ### Aggregation Dimension
|
||||
*
|
||||
* All aggregation methods accept a **dimension** parameter. These parameters are important as
|
||||
* they control how rows in a table are aggregated together.
|
||||
*
|
||||
* A **_dimension_** is just a table column. Rows that have the same values for these columns are
|
||||
* aggregated together. The result of these aggregations is a set of metrics for every recorded value
|
||||
* of a **dimension**.
|
||||
*
|
||||
* _Note: A dimension is essentially the same as a **GROUP BY** field._
|
||||
*
|
||||
* ### Examples
|
||||
*
|
||||
* **Aggregating visit data**
|
||||
*
|
||||
* $archiveProcessor = // ...
|
||||
* $logAggregator = $archiveProcessor->getLogAggregator();
|
||||
*
|
||||
* // get metrics for every used browser language of all visits by returning visitors
|
||||
* $query = $logAggregator->queryVisitsByDimension(
|
||||
* $dimensions = array('log_visit.location_browser_lang'),
|
||||
* $where = 'log_visit.visitor_returning = 1',
|
||||
*
|
||||
* // also count visits for each browser language that are not located in the US
|
||||
* $additionalSelects = array('sum(case when log_visit.location_country <> 'us' then 1 else 0 end) as nonus'),
|
||||
*
|
||||
* // we're only interested in visits, unique visitors & actions, so don't waste time calculating anything else
|
||||
* $metrics = array(Metrics::INDEX_NB_UNIQ_VISITORS, Metrics::INDEX_NB_VISITS, Metrics::INDEX_NB_ACTIONS),
|
||||
* );
|
||||
* if ($query === false) {
|
||||
* return;
|
||||
* }
|
||||
*
|
||||
* while ($row = $query->fetch()) {
|
||||
* $uniqueVisitors = $row[Metrics::INDEX_NB_UNIQ_VISITORS];
|
||||
* $visits = $row[Metrics::INDEX_NB_VISITS];
|
||||
* $actions = $row[Metrics::INDEX_NB_ACTIONS];
|
||||
*
|
||||
* // ... do something w/ calculated metrics ...
|
||||
* }
|
||||
*
|
||||
* **Aggregating conversion data**
|
||||
*
|
||||
* $archiveProcessor = // ...
|
||||
* $logAggregator = $archiveProcessor->getLogAggregator();
|
||||
*
|
||||
* // get metrics for ecommerce conversions for each country
|
||||
* $query = $logAggregator->queryConversionsByDimension(
|
||||
* $dimensions = array('log_conversion.location_country'),
|
||||
* $where = 'log_conversion.idgoal = 0', // 0 is the special ecommerceOrder idGoal value in the table
|
||||
*
|
||||
* // also calculate average tax and max shipping per country
|
||||
* $additionalSelects = array(
|
||||
* 'AVG(log_conversion.revenue_tax) as avg_tax',
|
||||
* 'MAX(log_conversion.revenue_shipping) as max_shipping'
|
||||
* )
|
||||
* );
|
||||
* if ($query === false) {
|
||||
* return;
|
||||
* }
|
||||
*
|
||||
* while ($row = $query->fetch()) {
|
||||
* $country = $row['location_country'];
|
||||
* $numEcommerceSales = $row[Metrics::INDEX_GOAL_NB_CONVERSIONS];
|
||||
* $numVisitsWithEcommerceSales = $row[Metrics::INDEX_GOAL_NB_VISITS_CONVERTED];
|
||||
* $avgTaxForCountry = $row['avg_tax'];
|
||||
* $maxShippingForCountry = $row['max_shipping'];
|
||||
*
|
||||
* // ... do something with aggregated data ...
|
||||
* }
|
||||
*/
|
||||
class LogAggregator
|
||||
{
|
||||
const LOG_VISIT_TABLE = 'log_visit';
|
||||
|
||||
const LOG_ACTIONS_TABLE = 'log_link_visit_action';
|
||||
|
||||
const LOG_CONVERSION_TABLE = "log_conversion";
|
||||
|
||||
const REVENUE_SUBTOTAL_FIELD = 'revenue_subtotal';
|
||||
|
||||
const REVENUE_TAX_FIELD = 'revenue_tax';
|
||||
|
||||
const REVENUE_SHIPPING_FIELD = 'revenue_shipping';
|
||||
|
||||
const REVENUE_DISCOUNT_FIELD = 'revenue_discount';
|
||||
|
||||
const TOTAL_REVENUE_FIELD = 'revenue';
|
||||
|
||||
const ITEMS_COUNT_FIELD = "items";
|
||||
|
||||
const CONVERSION_DATETIME_FIELD = "server_time";
|
||||
|
||||
const ACTION_DATETIME_FIELD = "server_time";
|
||||
|
||||
const VISIT_DATETIME_FIELD = 'visit_last_action_time';
|
||||
|
||||
const IDGOAL_FIELD = 'idgoal';
|
||||
|
||||
const FIELDS_SEPARATOR = ", \n\t\t\t";
|
||||
|
||||
/** @var \Piwik\Date */
|
||||
protected $dateStart;
|
||||
|
||||
/** @var \Piwik\Date */
|
||||
protected $dateEnd;
|
||||
|
||||
/** @var int[] */
|
||||
protected $sites;
|
||||
|
||||
/** @var \Piwik\Segment */
|
||||
protected $segment;
|
||||
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
private $queryOriginHint = '';
|
||||
|
||||
/**
|
||||
* @var LoggerInterface
|
||||
*/
|
||||
private $logger;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param \Piwik\ArchiveProcessor\Parameters $params
|
||||
*/
|
||||
public function __construct(Parameters $params, LoggerInterface $logger = null)
|
||||
{
|
||||
$this->dateStart = $params->getDateTimeStart();
|
||||
$this->dateEnd = $params->getDateTimeEnd();
|
||||
$this->segment = $params->getSegment();
|
||||
$this->sites = $params->getIdSites();
|
||||
$this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
|
||||
}
|
||||
|
||||
public function getSegment()
|
||||
{
|
||||
return $this->segment;
|
||||
}
|
||||
|
||||
public function setQueryOriginHint($nameOfOrigiin)
|
||||
{
|
||||
$this->queryOriginHint = $nameOfOrigiin;
|
||||
}
|
||||
|
||||
public function generateQuery($select, $from, $where, $groupBy, $orderBy, $limit = 0, $offset = 0)
|
||||
{
|
||||
$bind = $this->getGeneralQueryBindParams();
|
||||
$query = $this->segment->getSelectQuery($select, $from, $where, $bind, $orderBy, $groupBy, $limit, $offset);
|
||||
|
||||
$select = 'SELECT';
|
||||
if ($this->queryOriginHint && is_array($query) && 0 === strpos(trim($query['sql']), $select)) {
|
||||
$query['sql'] = trim($query['sql']);
|
||||
$query['sql'] = 'SELECT /* ' . $this->queryOriginHint . ' */' . substr($query['sql'], strlen($select));
|
||||
}
|
||||
|
||||
// Log on DEBUG level all SQL archiving queries
|
||||
$this->logger->debug($query['sql']);
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
protected function getVisitsMetricFields()
|
||||
{
|
||||
return array(
|
||||
Metrics::INDEX_NB_UNIQ_VISITORS => "count(distinct " . self::LOG_VISIT_TABLE . ".idvisitor)",
|
||||
Metrics::INDEX_NB_UNIQ_FINGERPRINTS => "count(distinct " . self::LOG_VISIT_TABLE . ".config_id)",
|
||||
Metrics::INDEX_NB_VISITS => "count(*)",
|
||||
Metrics::INDEX_NB_ACTIONS => "sum(" . self::LOG_VISIT_TABLE . ".visit_total_actions)",
|
||||
Metrics::INDEX_MAX_ACTIONS => "max(" . self::LOG_VISIT_TABLE . ".visit_total_actions)",
|
||||
Metrics::INDEX_SUM_VISIT_LENGTH => "sum(" . self::LOG_VISIT_TABLE . ".visit_total_time)",
|
||||
Metrics::INDEX_BOUNCE_COUNT => "sum(case " . self::LOG_VISIT_TABLE . ".visit_total_actions when 1 then 1 when 0 then 1 else 0 end)",
|
||||
Metrics::INDEX_NB_VISITS_CONVERTED => "sum(case " . self::LOG_VISIT_TABLE . ".visit_goal_converted when 1 then 1 else 0 end)",
|
||||
Metrics::INDEX_NB_USERS => "count(distinct " . self::LOG_VISIT_TABLE . ".user_id)",
|
||||
);
|
||||
}
|
||||
|
||||
public static function getConversionsMetricFields()
|
||||
{
|
||||
return array(
|
||||
Metrics::INDEX_GOAL_NB_CONVERSIONS => "count(*)",
|
||||
Metrics::INDEX_GOAL_NB_VISITS_CONVERTED => "count(distinct " . self::LOG_CONVERSION_TABLE . ".idvisit)",
|
||||
Metrics::INDEX_GOAL_REVENUE => self::getSqlConversionRevenueSum(self::TOTAL_REVENUE_FIELD),
|
||||
Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SUBTOTAL => self::getSqlConversionRevenueSum(self::REVENUE_SUBTOTAL_FIELD),
|
||||
Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_TAX => self::getSqlConversionRevenueSum(self::REVENUE_TAX_FIELD),
|
||||
Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SHIPPING => self::getSqlConversionRevenueSum(self::REVENUE_SHIPPING_FIELD),
|
||||
Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_DISCOUNT => self::getSqlConversionRevenueSum(self::REVENUE_DISCOUNT_FIELD),
|
||||
Metrics::INDEX_GOAL_ECOMMERCE_ITEMS => "SUM(" . self::LOG_CONVERSION_TABLE . "." . self::ITEMS_COUNT_FIELD . ")",
|
||||
);
|
||||
}
|
||||
|
||||
private static function getSqlConversionRevenueSum($field)
|
||||
{
|
||||
return self::getSqlRevenue('SUM(' . self::LOG_CONVERSION_TABLE . '.' . $field . ')');
|
||||
}
|
||||
|
||||
public static function getSqlRevenue($field)
|
||||
{
|
||||
return "ROUND(" . $field . "," . GoalManager::REVENUE_PRECISION . ")";
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function that returns an array with common metrics for a given log_visit field distinct values.
|
||||
*
|
||||
* The statistics returned are:
|
||||
* - number of unique visitors
|
||||
* - number of visits
|
||||
* - number of actions
|
||||
* - maximum number of action for a visit
|
||||
* - sum of the visits' length in sec
|
||||
* - count of bouncing visits (visits with one page view)
|
||||
*
|
||||
* For example if $dimension = 'config_os' it will return the statistics for every distinct Operating systems
|
||||
* The returned array will have a row per distinct operating systems,
|
||||
* and a column per stat (nb of visits, max actions, etc)
|
||||
*
|
||||
* 'label' Metrics::INDEX_NB_UNIQ_VISITORS Metrics::INDEX_NB_VISITS etc.
|
||||
* Linux 27 66 ...
|
||||
* Windows XP 12 ...
|
||||
* Mac OS 15 36 ...
|
||||
*
|
||||
* @param string $dimension Table log_visit field name to be use to compute common stats
|
||||
* @return DataArray
|
||||
*/
|
||||
public function getMetricsFromVisitByDimension($dimension)
|
||||
{
|
||||
if (!is_array($dimension)) {
|
||||
$dimension = array($dimension);
|
||||
}
|
||||
if (count($dimension) == 1) {
|
||||
$dimension = array("label" => reset($dimension));
|
||||
}
|
||||
$query = $this->queryVisitsByDimension($dimension);
|
||||
$metrics = new DataArray();
|
||||
while ($row = $query->fetch()) {
|
||||
$metrics->sumMetricsVisits($row["label"], $row);
|
||||
}
|
||||
return $metrics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes and returns a query aggregating visit logs, optionally grouping by some dimension. Returns
|
||||
* a DB statement that can be used to iterate over the result
|
||||
*
|
||||
* **Result Set**
|
||||
*
|
||||
* The following columns are in each row of the result set:
|
||||
*
|
||||
* - **{@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}**: The total number of unique visitors in this group
|
||||
* of aggregated visits.
|
||||
* - **{@link Piwik\Metrics::INDEX_NB_VISITS}**: The total number of visits aggregated.
|
||||
* - **{@link Piwik\Metrics::INDEX_NB_ACTIONS}**: The total number of actions performed in this group of
|
||||
* aggregated visits.
|
||||
* - **{@link Piwik\Metrics::INDEX_MAX_ACTIONS}**: The maximum actions perfomred in one visit for this group of
|
||||
* visits.
|
||||
* - **{@link Piwik\Metrics::INDEX_SUM_VISIT_LENGTH}**: The total amount of time spent on the site for this
|
||||
* group of visits.
|
||||
* - **{@link Piwik\Metrics::INDEX_BOUNCE_COUNT}**: The total number of bounced visits in this group of
|
||||
* visits.
|
||||
* - **{@link Piwik\Metrics::INDEX_NB_VISITS_CONVERTED}**: The total number of visits for which at least one
|
||||
* conversion occurred, for this group of visits.
|
||||
*
|
||||
* Additional data can be selected by setting the `$additionalSelects` parameter.
|
||||
*
|
||||
* _Note: The metrics returned by this query can be customized by the `$metrics` parameter._
|
||||
*
|
||||
* @param array|string $dimensions `SELECT` fields (or just one field) that will be grouped by,
|
||||
* eg, `'referrer_name'` or `array('referrer_name', 'referrer_keyword')`.
|
||||
* The metrics retrieved from the query will be specific to combinations
|
||||
* of these fields. So if `array('referrer_name', 'referrer_keyword')`
|
||||
* is supplied, the query will aggregate visits for each referrer/keyword
|
||||
* combination.
|
||||
* @param bool|string $where Additional condition for the `WHERE` clause. Can be used to filter
|
||||
* the set of visits that are considered for aggregation.
|
||||
* @param array $additionalSelects Additional `SELECT` fields that are not included in the group by
|
||||
* clause. These can be aggregate expressions, eg, `SUM(somecol)`.
|
||||
* @param bool|array $metrics The set of metrics to calculate and return. If false, the query will select
|
||||
* all of them. The following values can be used:
|
||||
*
|
||||
* - {@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}
|
||||
* - {@link Piwik\Metrics::INDEX_NB_VISITS}
|
||||
* - {@link Piwik\Metrics::INDEX_NB_ACTIONS}
|
||||
* - {@link Piwik\Metrics::INDEX_MAX_ACTIONS}
|
||||
* - {@link Piwik\Metrics::INDEX_SUM_VISIT_LENGTH}
|
||||
* - {@link Piwik\Metrics::INDEX_BOUNCE_COUNT}
|
||||
* - {@link Piwik\Metrics::INDEX_NB_VISITS_CONVERTED}
|
||||
* @param bool|\Piwik\RankingQuery $rankingQuery
|
||||
* A pre-configured ranking query instance that will be used to limit the result.
|
||||
* If set, the return value is the array returned by {@link Piwik\RankingQuery::execute()}.
|
||||
* @return mixed A Zend_Db_Statement if `$rankingQuery` isn't supplied, otherwise the result of
|
||||
* {@link Piwik\RankingQuery::execute()}. Read {@link queryVisitsByDimension() this}
|
||||
* to see what aggregate data is calculated by the query.
|
||||
* @api
|
||||
*/
|
||||
public function queryVisitsByDimension(array $dimensions = array(), $where = false, array $additionalSelects = array(),
|
||||
$metrics = false, $rankingQuery = false)
|
||||
{
|
||||
$tableName = self::LOG_VISIT_TABLE;
|
||||
$availableMetrics = $this->getVisitsMetricFields();
|
||||
|
||||
$select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics, $metrics);
|
||||
$from = array($tableName);
|
||||
$where = $this->getWhereStatement($tableName, self::VISIT_DATETIME_FIELD, $where);
|
||||
$groupBy = $this->getGroupByStatement($dimensions, $tableName);
|
||||
$orderBy = false;
|
||||
|
||||
if ($rankingQuery) {
|
||||
$orderBy = '`' . Metrics::INDEX_NB_VISITS . '` DESC';
|
||||
}
|
||||
|
||||
$query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
|
||||
|
||||
if ($rankingQuery) {
|
||||
unset($availableMetrics[Metrics::INDEX_MAX_ACTIONS]);
|
||||
$sumColumns = array_keys($availableMetrics);
|
||||
|
||||
if ($metrics) {
|
||||
$sumColumns = array_intersect($sumColumns, $metrics);
|
||||
}
|
||||
|
||||
$rankingQuery->addColumn($sumColumns, 'sum');
|
||||
if ($this->isMetricRequested(Metrics::INDEX_MAX_ACTIONS, $metrics)) {
|
||||
$rankingQuery->addColumn(Metrics::INDEX_MAX_ACTIONS, 'max');
|
||||
}
|
||||
|
||||
return $rankingQuery->execute($query['sql'], $query['bind']);
|
||||
}
|
||||
|
||||
return $this->getDb()->query($query['sql'], $query['bind']);
|
||||
}
|
||||
|
||||
protected function getSelectsMetrics($metricsAvailable, $metricsRequested = false)
|
||||
{
|
||||
$selects = array();
|
||||
|
||||
foreach ($metricsAvailable as $metricId => $statement) {
|
||||
if ($this->isMetricRequested($metricId, $metricsRequested)) {
|
||||
$aliasAs = $this->getSelectAliasAs($metricId);
|
||||
$selects[] = $statement . $aliasAs;
|
||||
}
|
||||
}
|
||||
|
||||
return $selects;
|
||||
}
|
||||
|
||||
protected function getSelectStatement($dimensions, $tableName, $additionalSelects, array $availableMetrics, $requestedMetrics = false)
|
||||
{
|
||||
$dimensionsToSelect = $this->getDimensionsToSelect($dimensions, $additionalSelects);
|
||||
|
||||
$selects = array_merge(
|
||||
$this->getSelectDimensions($dimensionsToSelect, $tableName),
|
||||
$this->getSelectsMetrics($availableMetrics, $requestedMetrics),
|
||||
!empty($additionalSelects) ? $additionalSelects : array()
|
||||
);
|
||||
|
||||
$select = implode(self::FIELDS_SEPARATOR, $selects);
|
||||
return $select;
|
||||
}
|
||||
|
||||
/**
|
||||
* Will return the subset of $dimensions that are not found in $additionalSelects
|
||||
*
|
||||
* @param $dimensions
|
||||
* @param array $additionalSelects
|
||||
* @return array
|
||||
*/
|
||||
protected function getDimensionsToSelect($dimensions, $additionalSelects)
|
||||
{
|
||||
if (empty($additionalSelects)) {
|
||||
return $dimensions;
|
||||
}
|
||||
|
||||
$dimensionsToSelect = array();
|
||||
foreach ($dimensions as $selectAs => $dimension) {
|
||||
$asAlias = $this->getSelectAliasAs($dimension);
|
||||
foreach ($additionalSelects as $additionalSelect) {
|
||||
if (strpos($additionalSelect, $asAlias) === false) {
|
||||
$dimensionsToSelect[$selectAs] = $dimension;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$dimensionsToSelect = array_unique($dimensionsToSelect);
|
||||
return $dimensionsToSelect;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the dimensions array, where
|
||||
* (1) the table name is prepended to the field
|
||||
* (2) the "AS `label` " is appended to the field
|
||||
*
|
||||
* @param $dimensions
|
||||
* @param $tableName
|
||||
* @param bool $appendSelectAs
|
||||
* @param bool $parseSelectAs
|
||||
* @return mixed
|
||||
*/
|
||||
protected function getSelectDimensions($dimensions, $tableName, $appendSelectAs = true)
|
||||
{
|
||||
foreach ($dimensions as $selectAs => &$field) {
|
||||
$selectAsString = $field;
|
||||
|
||||
if (!is_numeric($selectAs)) {
|
||||
$selectAsString = $selectAs;
|
||||
} else if ($this->isFieldFunctionOrComplexExpression($field)) {
|
||||
// if complex expression has a select as, use it
|
||||
if (!$appendSelectAs && preg_match('/\s+AS\s+(.*?)\s*$/', $field, $matches)) {
|
||||
$field = $matches[1];
|
||||
continue;
|
||||
}
|
||||
|
||||
// if function w/o select as, do not alias or prefix
|
||||
$selectAsString = $appendSelectAs = false;
|
||||
}
|
||||
|
||||
$isKnownField = !in_array($field, array('referrer_data'));
|
||||
|
||||
if ($selectAsString == $field && $isKnownField) {
|
||||
$field = $this->prefixColumn($field, $tableName);
|
||||
}
|
||||
|
||||
if ($appendSelectAs && $selectAsString) {
|
||||
$field = $this->prefixColumn($field, $tableName) . $this->getSelectAliasAs($selectAsString);
|
||||
}
|
||||
}
|
||||
|
||||
return $dimensions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prefixes a column name with a table name if not already done.
|
||||
*
|
||||
* @param string $column eg, 'location_provider'
|
||||
* @param string $tableName eg, 'log_visit'
|
||||
* @return string eg, 'log_visit.location_provider'
|
||||
*/
|
||||
private function prefixColumn($column, $tableName)
|
||||
{
|
||||
if (strpos($column, '.') === false) {
|
||||
return $tableName . '.' . $column;
|
||||
} else {
|
||||
return $column;
|
||||
}
|
||||
}
|
||||
|
||||
protected function isFieldFunctionOrComplexExpression($field)
|
||||
{
|
||||
return strpos($field, "(") !== false
|
||||
|| strpos($field, "CASE") !== false;
|
||||
}
|
||||
|
||||
protected function getSelectAliasAs($metricId)
|
||||
{
|
||||
return " AS `" . $metricId . "`";
|
||||
}
|
||||
|
||||
protected function isMetricRequested($metricId, $metricsRequested)
|
||||
{
|
||||
// do not process INDEX_NB_UNIQ_FINGERPRINTS unless specifically asked for
|
||||
if ($metricsRequested === false) {
|
||||
if ($metricId == Metrics::INDEX_NB_UNIQ_FINGERPRINTS) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return in_array($metricId, $metricsRequested);
|
||||
}
|
||||
|
||||
public function getWhereStatement($tableName, $datetimeField, $extraWhere = false)
|
||||
{
|
||||
$where = "$tableName.$datetimeField >= ?
|
||||
AND $tableName.$datetimeField <= ?
|
||||
AND $tableName.idsite IN (". Common::getSqlStringFieldsArray($this->sites) . ")";
|
||||
|
||||
if (!empty($extraWhere)) {
|
||||
$extraWhere = sprintf($extraWhere, $tableName, $tableName);
|
||||
$where .= ' AND ' . $extraWhere;
|
||||
}
|
||||
|
||||
return $where;
|
||||
}
|
||||
|
||||
protected function getGroupByStatement($dimensions, $tableName)
|
||||
{
|
||||
$dimensions = $this->getSelectDimensions($dimensions, $tableName, $appendSelectAs = false);
|
||||
$groupBy = implode(", ", $dimensions);
|
||||
|
||||
return $groupBy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns general bind parameters for all log aggregation queries. This includes the datetime
|
||||
* start of entities, datetime end of entities and IDs of all sites.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getGeneralQueryBindParams()
|
||||
{
|
||||
$bind = array($this->dateStart->toString(Date::DATE_TIME_FORMAT), $this->dateEnd->toString(Date::DATE_TIME_FORMAT));
|
||||
$bind = array_merge($bind, $this->sites);
|
||||
|
||||
return $bind;
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes and returns a query aggregating ecommerce item data (everything stored in the
|
||||
* **log\_conversion\_item** table) and returns a DB statement that can be used to iterate over the result
|
||||
*
|
||||
* <a name="queryEcommerceItems-result-set"></a>
|
||||
* **Result Set**
|
||||
*
|
||||
* Each row of the result set represents an aggregated group of ecommerce items. The following
|
||||
* columns are in each row of the result set:
|
||||
*
|
||||
* - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ITEM_REVENUE}**: The total revenue for the group of items.
|
||||
* - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ITEM_QUANTITY}**: The total number of items in this group.
|
||||
* - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ITEM_PRICE}**: The total price for the group of items.
|
||||
* - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ORDERS}**: The total number of orders this group of items
|
||||
* belongs to. This will be <= to the total number
|
||||
* of items in this group.
|
||||
* - **{@link Piwik\Metrics::INDEX_NB_VISITS}**: The total number of visits that caused these items to be logged.
|
||||
* - **ecommerceType**: Either {@link Piwik\Tracker\GoalManager::IDGOAL_CART} if the items in this group were
|
||||
* abandoned by a visitor, or {@link Piwik\Tracker\GoalManager::IDGOAL_ORDER} if they
|
||||
* were ordered by a visitor.
|
||||
*
|
||||
* **Limitations**
|
||||
*
|
||||
* Segmentation is not yet supported for this aggregation method.
|
||||
*
|
||||
* @param string $dimension One or more **log\_conversion\_item** columns to group aggregated data by.
|
||||
* Eg, `'idaction_sku'` or `'idaction_sku, idaction_category'`.
|
||||
* @return \Zend_Db_Statement A statement object that can be used to iterate through the query's
|
||||
* result set. See [above](#queryEcommerceItems-result-set) to learn more
|
||||
* about what this query selects.
|
||||
* @api
|
||||
*/
|
||||
public function queryEcommerceItems($dimension)
|
||||
{
|
||||
$query = $this->generateQuery(
|
||||
// SELECT ...
|
||||
implode(
|
||||
', ',
|
||||
array(
|
||||
"log_action.name AS label",
|
||||
sprintf("log_conversion_item.%s AS labelIdAction", $dimension),
|
||||
sprintf(
|
||||
'%s AS `%d`',
|
||||
self::getSqlRevenue('SUM(log_conversion_item.quantity * log_conversion_item.price)'),
|
||||
Metrics::INDEX_ECOMMERCE_ITEM_REVENUE
|
||||
),
|
||||
sprintf(
|
||||
'%s AS `%d`',
|
||||
self::getSqlRevenue('SUM(log_conversion_item.quantity)'),
|
||||
Metrics::INDEX_ECOMMERCE_ITEM_QUANTITY
|
||||
),
|
||||
sprintf(
|
||||
'%s AS `%d`',
|
||||
self::getSqlRevenue('SUM(log_conversion_item.price)'),
|
||||
Metrics::INDEX_ECOMMERCE_ITEM_PRICE
|
||||
),
|
||||
sprintf(
|
||||
'COUNT(distinct log_conversion_item.idorder) AS `%d`',
|
||||
Metrics::INDEX_ECOMMERCE_ORDERS
|
||||
),
|
||||
sprintf(
|
||||
'COUNT(distinct log_conversion_item.idvisit) AS `%d`',
|
||||
Metrics::INDEX_NB_VISITS
|
||||
),
|
||||
sprintf(
|
||||
'CASE log_conversion_item.idorder WHEN \'0\' THEN %d ELSE %d END AS ecommerceType',
|
||||
GoalManager::IDGOAL_CART,
|
||||
GoalManager::IDGOAL_ORDER
|
||||
)
|
||||
)
|
||||
),
|
||||
|
||||
// FROM ...
|
||||
array(
|
||||
"log_conversion_item",
|
||||
array(
|
||||
"table" => "log_action",
|
||||
"joinOn" => sprintf("log_conversion_item.%s = log_action.idaction", $dimension)
|
||||
)
|
||||
),
|
||||
|
||||
// WHERE ... AND ...
|
||||
implode(
|
||||
' AND ',
|
||||
array(
|
||||
'log_conversion_item.server_time >= ?',
|
||||
'log_conversion_item.server_time <= ?',
|
||||
'log_conversion_item.idsite IN (' . Common::getSqlStringFieldsArray($this->sites) . ')',
|
||||
'log_conversion_item.deleted = 0'
|
||||
)
|
||||
),
|
||||
|
||||
// GROUP BY ...
|
||||
sprintf(
|
||||
"ecommerceType, log_conversion_item.%s",
|
||||
$dimension
|
||||
),
|
||||
|
||||
// ORDER ...
|
||||
false
|
||||
);
|
||||
|
||||
return $this->getDb()->query($query['sql'], $query['bind']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes and returns a query aggregating action data (everything in the log_action table) and returns
|
||||
* a DB statement that can be used to iterate over the result
|
||||
*
|
||||
* <a name="queryActionsByDimension-result-set"></a>
|
||||
* **Result Set**
|
||||
*
|
||||
* Each row of the result set represents an aggregated group of actions. The following columns
|
||||
* are in each aggregate row:
|
||||
*
|
||||
* - **{@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}**: The total number of unique visitors that performed
|
||||
* the actions in this group.
|
||||
* - **{@link Piwik\Metrics::INDEX_NB_VISITS}**: The total number of visits these actions belong to.
|
||||
* - **{@link Piwik\Metrics::INDEX_NB_ACTIONS}**: The total number of actions in this aggregate group.
|
||||
*
|
||||
* Additional data can be selected through the `$additionalSelects` parameter.
|
||||
*
|
||||
* _Note: The metrics calculated by this query can be customized by the `$metrics` parameter._
|
||||
*
|
||||
* @param array|string $dimensions One or more SELECT fields that will be used to group the log_action
|
||||
* rows by. This parameter determines which log_action rows will be
|
||||
* aggregated together.
|
||||
* @param bool|string $where Additional condition for the WHERE clause. Can be used to filter
|
||||
* the set of visits that are considered for aggregation.
|
||||
* @param array $additionalSelects Additional SELECT fields that are not included in the group by
|
||||
* clause. These can be aggregate expressions, eg, `SUM(somecol)`.
|
||||
* @param bool|array $metrics The set of metrics to calculate and return. If `false`, the query will select
|
||||
* all of them. The following values can be used:
|
||||
*
|
||||
* - {@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}
|
||||
* - {@link Piwik\Metrics::INDEX_NB_VISITS}
|
||||
* - {@link Piwik\Metrics::INDEX_NB_ACTIONS}
|
||||
* @param bool|\Piwik\RankingQuery $rankingQuery
|
||||
* A pre-configured ranking query instance that will be used to limit the result.
|
||||
* If set, the return value is the array returned by {@link Piwik\RankingQuery::execute()}.
|
||||
* @param bool|string $joinLogActionOnColumn One or more columns from the **log_link_visit_action** table that
|
||||
* log_action should be joined on. The table alias used for each join
|
||||
* is `"log_action$i"` where `$i` is the index of the column in this
|
||||
* array.
|
||||
*
|
||||
* If a string is used for this parameter, the table alias is not
|
||||
* suffixed (since there is only one column).
|
||||
* @return mixed A Zend_Db_Statement if `$rankingQuery` isn't supplied, otherwise the result of
|
||||
* {@link Piwik\RankingQuery::execute()}. Read [this](#queryEcommerceItems-result-set)
|
||||
* to see what aggregate data is calculated by the query.
|
||||
* @api
|
||||
*/
|
||||
public function queryActionsByDimension($dimensions, $where = '', $additionalSelects = array(), $metrics = false, $rankingQuery = null, $joinLogActionOnColumn = false)
|
||||
{
|
||||
$tableName = self::LOG_ACTIONS_TABLE;
|
||||
$availableMetrics = $this->getActionsMetricFields();
|
||||
|
||||
$select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics, $metrics);
|
||||
$from = array($tableName);
|
||||
$where = $this->getWhereStatement($tableName, self::ACTION_DATETIME_FIELD, $where);
|
||||
$groupBy = $this->getGroupByStatement($dimensions, $tableName);
|
||||
$orderBy = false;
|
||||
|
||||
if ($joinLogActionOnColumn !== false) {
|
||||
$multiJoin = is_array($joinLogActionOnColumn);
|
||||
if (!$multiJoin) {
|
||||
$joinLogActionOnColumn = array($joinLogActionOnColumn);
|
||||
}
|
||||
|
||||
foreach ($joinLogActionOnColumn as $i => $joinColumn) {
|
||||
$tableAlias = 'log_action' . ($multiJoin ? $i + 1 : '');
|
||||
|
||||
if (strpos($joinColumn, ' ') === false) {
|
||||
$joinOn = $tableAlias . '.idaction = ' . $tableName . '.' . $joinColumn;
|
||||
} else {
|
||||
// more complex join column like if (...)
|
||||
$joinOn = $tableAlias . '.idaction = ' . $joinColumn;
|
||||
}
|
||||
|
||||
$from[] = array(
|
||||
'table' => 'log_action',
|
||||
'tableAlias' => $tableAlias,
|
||||
'joinOn' => $joinOn
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if ($rankingQuery) {
|
||||
$orderBy = '`' . Metrics::INDEX_NB_ACTIONS . '` DESC';
|
||||
}
|
||||
|
||||
$query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
|
||||
|
||||
if ($rankingQuery !== null) {
|
||||
$sumColumns = array_keys($availableMetrics);
|
||||
if ($metrics) {
|
||||
$sumColumns = array_intersect($sumColumns, $metrics);
|
||||
}
|
||||
|
||||
$rankingQuery->addColumn($sumColumns, 'sum');
|
||||
|
||||
return $rankingQuery->execute($query['sql'], $query['bind']);
|
||||
}
|
||||
|
||||
return $this->getDb()->query($query['sql'], $query['bind']);
|
||||
}
|
||||
|
||||
protected function getActionsMetricFields()
|
||||
{
|
||||
return array(
|
||||
Metrics::INDEX_NB_VISITS => "count(distinct " . self::LOG_ACTIONS_TABLE . ".idvisit)",
|
||||
Metrics::INDEX_NB_UNIQ_VISITORS => "count(distinct " . self::LOG_ACTIONS_TABLE . ".idvisitor)",
|
||||
Metrics::INDEX_NB_ACTIONS => "count(*)",
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes a query aggregating conversion data (everything in the **log_conversion** table) and returns
|
||||
* a DB statement that can be used to iterate over the result.
|
||||
*
|
||||
* <a name="queryConversionsByDimension-result-set"></a>
|
||||
* **Result Set**
|
||||
*
|
||||
* Each row of the result set represents an aggregated group of conversions. The
|
||||
* following columns are in each aggregate row:
|
||||
*
|
||||
* - **{@link Piwik\Metrics::INDEX_GOAL_NB_CONVERSIONS}**: The total number of conversions in this aggregate
|
||||
* group.
|
||||
* - **{@link Piwik\Metrics::INDEX_GOAL_NB_VISITS_CONVERTED}**: The total number of visits during which these
|
||||
* conversions were converted.
|
||||
* - **{@link Piwik\Metrics::INDEX_GOAL_REVENUE}**: The total revenue generated by these conversions. This value
|
||||
* includes the revenue from individual ecommerce items.
|
||||
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SUBTOTAL}**: The total cost of all ecommerce items sold
|
||||
* within these conversions. This value does not
|
||||
* include tax, shipping or any applied discount.
|
||||
*
|
||||
* _This metric is only applicable to the special
|
||||
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
|
||||
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_TAX}**: The total tax applied to every transaction in these
|
||||
* conversions.
|
||||
*
|
||||
* _This metric is only applicable to the special
|
||||
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
|
||||
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SHIPPING}**: The total shipping cost for every transaction
|
||||
* in these conversions.
|
||||
*
|
||||
* _This metric is only applicable to the special
|
||||
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
|
||||
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_DISCOUNT}**: The total discount applied to every transaction
|
||||
* in these conversions.
|
||||
*
|
||||
* _This metric is only applicable to the special
|
||||
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
|
||||
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_ITEMS}**: The total number of ecommerce items sold in each transaction
|
||||
* in these conversions.
|
||||
*
|
||||
* _This metric is only applicable to the special
|
||||
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
|
||||
*
|
||||
* Additional data can be selected through the `$additionalSelects` parameter.
|
||||
*
|
||||
* _Note: This method will only query the **log_conversion** table. Other tables cannot be joined
|
||||
* using this method._
|
||||
*
|
||||
* @param array|string $dimensions One or more **SELECT** fields that will be used to group the log_conversion
|
||||
* rows by. This parameter determines which **log_conversion** rows will be
|
||||
* aggregated together.
|
||||
* @param bool|string $where An optional SQL expression used in the SQL's **WHERE** clause.
|
||||
* @param array $additionalSelects Additional SELECT fields that are not included in the group by
|
||||
* clause. These can be aggregate expressions, eg, `SUM(somecol)`.
|
||||
* @return \Zend_Db_Statement
|
||||
*/
|
||||
public function queryConversionsByDimension($dimensions = array(), $where = false, $additionalSelects = array(), $extraFrom = [])
|
||||
{
|
||||
$dimensions = array_merge(array(self::IDGOAL_FIELD), $dimensions);
|
||||
$tableName = self::LOG_CONVERSION_TABLE;
|
||||
$availableMetrics = $this->getConversionsMetricFields();
|
||||
|
||||
$select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics);
|
||||
|
||||
$from = array_merge([$tableName], $extraFrom);
|
||||
$where = $this->getWhereStatement($tableName, self::CONVERSION_DATETIME_FIELD, $where);
|
||||
$groupBy = $this->getGroupByStatement($dimensions, $tableName);
|
||||
$orderBy = false;
|
||||
$query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
|
||||
|
||||
return $this->getDb()->query($query['sql'], $query['bind']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and returns an array of SQL `SELECT` expressions that will each count how
|
||||
* many rows have a column whose value is within a certain range.
|
||||
*
|
||||
* **Note:** The result of this function is meant for use in the `$additionalSelects` parameter
|
||||
* in one of the query... methods (for example {@link queryVisitsByDimension()}).
|
||||
*
|
||||
* **Example**
|
||||
*
|
||||
* // summarize one column
|
||||
* $visitTotalActionsRanges = array(
|
||||
* array(1, 1),
|
||||
* array(2, 10),
|
||||
* array(10)
|
||||
* );
|
||||
* $selects = LogAggregator::getSelectsFromRangedColumn('visit_total_actions', $visitTotalActionsRanges, 'log_visit', 'vta');
|
||||
*
|
||||
* // summarize another column in the same request
|
||||
* $visitCountVisitsRanges = array(
|
||||
* array(1, 1),
|
||||
* array(2, 20),
|
||||
* array(20)
|
||||
* );
|
||||
* $selects = array_merge(
|
||||
* $selects,
|
||||
* LogAggregator::getSelectsFromRangedColumn('visitor_count_visits', $visitCountVisitsRanges, 'log_visit', 'vcv')
|
||||
* );
|
||||
*
|
||||
* // perform the query
|
||||
* $logAggregator = // get the LogAggregator somehow
|
||||
* $query = $logAggregator->queryVisitsByDimension($dimensions = array(), $where = false, $selects);
|
||||
* $tableSummary = $query->fetch();
|
||||
*
|
||||
* $numberOfVisitsWithOneAction = $tableSummary['vta0'];
|
||||
* $numberOfVisitsBetweenTwoAnd10 = $tableSummary['vta1'];
|
||||
*
|
||||
* $numberOfVisitsWithVisitCountOfOne = $tableSummary['vcv0'];
|
||||
*
|
||||
* @param string $column The name of a column in `$table` that will be summarized.
|
||||
* @param array $ranges The array of ranges over which the data in the table
|
||||
* will be summarized. For example,
|
||||
* ```
|
||||
* array(
|
||||
* array(1, 1),
|
||||
* array(2, 2),
|
||||
* array(3, 8),
|
||||
* array(8) // everything over 8
|
||||
* )
|
||||
* ```
|
||||
* @param string $table The unprefixed name of the table whose rows will be summarized.
|
||||
* @param string $selectColumnPrefix The prefix to prepend to each SELECT expression. This
|
||||
* prefix is used to differentiate different sets of
|
||||
* range summarization SELECTs. You can supply different
|
||||
* values to this argument to summarize several columns
|
||||
* in one query (see above for an example).
|
||||
* @param bool $restrictToReturningVisitors Whether to only summarize rows that belong to
|
||||
* visits of returning visitors or not. If this
|
||||
* argument is true, then the SELECT expressions
|
||||
* returned can only be used with the
|
||||
* {@link queryVisitsByDimension()} method.
|
||||
* @return array An array of SQL SELECT expressions, for example,
|
||||
* ```
|
||||
* array(
|
||||
* 'sum(case when log_visit.visit_total_actions between 0 and 2 then 1 else 0 end) as vta0',
|
||||
* 'sum(case when log_visit.visit_total_actions > 2 then 1 else 0 end) as vta1'
|
||||
* )
|
||||
* ```
|
||||
* @api
|
||||
*/
|
||||
public static function getSelectsFromRangedColumn($column, $ranges, $table, $selectColumnPrefix, $restrictToReturningVisitors = false)
|
||||
{
|
||||
$selects = array();
|
||||
$extraCondition = '';
|
||||
|
||||
if ($restrictToReturningVisitors) {
|
||||
// extra condition for the SQL SELECT that makes sure only returning visits are counted
|
||||
// when creating the 'days since last visit' report
|
||||
$extraCondition = 'and log_visit.visitor_returning = 1';
|
||||
$extraSelect = "sum(case when log_visit.visitor_returning = 0 then 1 else 0 end) "
|
||||
. " as `" . $selectColumnPrefix . 'General_NewVisits' . "`";
|
||||
$selects[] = $extraSelect;
|
||||
}
|
||||
|
||||
foreach ($ranges as $gap) {
|
||||
if (count($gap) == 2) {
|
||||
$lowerBound = $gap[0];
|
||||
$upperBound = $gap[1];
|
||||
|
||||
$selectAs = "$selectColumnPrefix$lowerBound-$upperBound";
|
||||
|
||||
$selects[] = "sum(case when $table.$column between $lowerBound and $upperBound $extraCondition" .
|
||||
" then 1 else 0 end) as `$selectAs`";
|
||||
} else {
|
||||
$lowerBound = $gap[0];
|
||||
|
||||
$selectAs = $selectColumnPrefix . ($lowerBound + 1) . urlencode('+');
|
||||
$selects[] = "sum(case when $table.$column > $lowerBound $extraCondition then 1 else 0 end) as `$selectAs`";
|
||||
}
|
||||
}
|
||||
|
||||
return $selects;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up the row data and return values.
|
||||
* $lookForThisPrefix can be used to make sure only SOME of the data in $row is used.
|
||||
*
|
||||
* The array will have one column $columnName
|
||||
*
|
||||
* @param $row
|
||||
* @param $columnName
|
||||
* @param bool $lookForThisPrefix A string that identifies which elements of $row to use
|
||||
* in the result. Every key of $row that starts with this
|
||||
* value is used.
|
||||
* @return array
|
||||
*/
|
||||
public static function makeArrayOneColumn($row, $columnName, $lookForThisPrefix = false)
|
||||
{
|
||||
$cleanRow = array();
|
||||
|
||||
foreach ($row as $label => $count) {
|
||||
if (empty($lookForThisPrefix)
|
||||
|| strpos($label, $lookForThisPrefix) === 0
|
||||
) {
|
||||
$cleanLabel = substr($label, strlen($lookForThisPrefix));
|
||||
$cleanRow[$cleanLabel] = array($columnName => $count);
|
||||
}
|
||||
}
|
||||
|
||||
return $cleanRow;
|
||||
}
|
||||
|
||||
public function getDb()
|
||||
{
|
||||
return Db::get();
|
||||
}
|
||||
}
|
301
msd2/tracking/piwik/core/DataAccess/LogQueryBuilder.php
Normal file
301
msd2/tracking/piwik/core/DataAccess/LogQueryBuilder.php
Normal file
@ -0,0 +1,301 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Piwik\DataAccess;
|
||||
|
||||
use Exception;
|
||||
use Piwik\DataAccess\LogQueryBuilder\JoinGenerator;
|
||||
use Piwik\DataAccess\LogQueryBuilder\JoinTables;
|
||||
use Piwik\Plugin\LogTablesProvider;
|
||||
use Piwik\Segment\SegmentExpression;
|
||||
|
||||
class LogQueryBuilder
|
||||
{
|
||||
/**
|
||||
* @var LogTablesProvider
|
||||
*/
|
||||
private $logTableProvider;
|
||||
|
||||
/**
|
||||
* Forces to use a subselect when generating the query. Set value to `false` to force not using a subselect.
|
||||
* @var string
|
||||
*/
|
||||
private $forcedInnerGroupBy = '';
|
||||
|
||||
public function __construct(LogTablesProvider $logTablesProvider)
|
||||
{
|
||||
$this->logTableProvider = $logTablesProvider;
|
||||
}
|
||||
|
||||
/**
|
||||
* Forces to use a subselect when generating the query.
|
||||
* @var string
|
||||
*/
|
||||
public function forceInnerGroupBySubselect($innerGroupBy)
|
||||
{
|
||||
$this->forcedInnerGroupBy = $innerGroupBy;
|
||||
}
|
||||
|
||||
public function getSelectQueryString(SegmentExpression $segmentExpression, $select, $from, $where, $bind, $groupBy,
|
||||
$orderBy, $limitAndOffset)
|
||||
{
|
||||
if (!is_array($from)) {
|
||||
$from = array($from);
|
||||
}
|
||||
|
||||
$fromInitially = $from;
|
||||
|
||||
if (!$segmentExpression->isEmpty()) {
|
||||
$segmentExpression->parseSubExpressionsIntoSqlExpressions($from);
|
||||
$segmentSql = $segmentExpression->getSql();
|
||||
$where = $this->getWhereMatchBoth($where, $segmentSql['where']);
|
||||
$bind = array_merge($bind, $segmentSql['bind']);
|
||||
}
|
||||
|
||||
$tables = new JoinTables($this->logTableProvider, $from);
|
||||
$join = new JoinGenerator($tables);
|
||||
$join->generate();
|
||||
$from = $join->getJoinString();
|
||||
$joinWithSubSelect = $join->shouldJoinWithSelect();
|
||||
|
||||
// hack for https://github.com/piwik/piwik/issues/9194#issuecomment-164321612
|
||||
$useSpecialConversionGroupBy = (!empty($segmentSql)
|
||||
&& strpos($groupBy, 'log_conversion.idgoal') !== false
|
||||
&& $fromInitially == array('log_conversion')
|
||||
&& strpos($from, 'log_link_visit_action') !== false);
|
||||
|
||||
if (!empty($this->forcedInnerGroupBy)) {
|
||||
$sql = $this->buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset, $tables, $this->forcedInnerGroupBy);
|
||||
} elseif ($useSpecialConversionGroupBy) {
|
||||
$innerGroupBy = "CONCAT(log_conversion.idvisit, '_' , log_conversion.idgoal, '_', log_conversion.buster)";
|
||||
$sql = $this->buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset, $tables, $innerGroupBy);
|
||||
} elseif ($joinWithSubSelect) {
|
||||
$sql = $this->buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset, $tables);
|
||||
} else {
|
||||
$sql = $this->buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset);
|
||||
}
|
||||
return array(
|
||||
'sql' => $sql,
|
||||
'bind' => $bind
|
||||
);
|
||||
}
|
||||
|
||||
private function getKnownTables()
|
||||
{
|
||||
$names = array();
|
||||
foreach ($this->logTableProvider->getAllLogTables() as $logTable) {
|
||||
$names[] = $logTable->getName();
|
||||
}
|
||||
return $names;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a select query where actions have to be joined on visits (or conversions)
|
||||
* In this case, the query gets wrapped in another query so that grouping by visit is possible
|
||||
* @param string $select
|
||||
* @param string $from
|
||||
* @param string $where
|
||||
* @param string $groupBy
|
||||
* @param string $orderBy
|
||||
* @param string $limitAndOffset
|
||||
* @param null|string $innerGroupBy If given, this inner group by will be used. If not, we try to detect one
|
||||
* @throws Exception
|
||||
* @return string
|
||||
*/
|
||||
private function buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset, JoinTables $tables, $innerGroupBy = null)
|
||||
{
|
||||
$matchTables = $this->getKnownTables();
|
||||
foreach ($tables as $table) {
|
||||
if (is_array($table) && isset($table['tableAlias']) && !in_array($table['tableAlias'], $matchTables, $strict = true)) {
|
||||
$matchTables[] = $table['tableAlias'];
|
||||
} elseif (is_array($table) && isset($table['table']) && !in_array($table['table'], $matchTables, $strict = true)) {
|
||||
$matchTables[] = $table['table'];
|
||||
} elseif (is_string($table) && !in_array($table, $matchTables, $strict = true)) {
|
||||
$matchTables[] = $table;
|
||||
}
|
||||
}
|
||||
|
||||
$matchTables = '(' . implode('|', $matchTables) . ')';
|
||||
preg_match_all("/". $matchTables ."\.[a-z0-9_\*]+/", $select, $matches);
|
||||
$neededFields = array_unique($matches[0]);
|
||||
|
||||
if (count($neededFields) == 0) {
|
||||
throw new Exception("No needed fields found in select expression. "
|
||||
. "Please use a table prefix.");
|
||||
}
|
||||
|
||||
$fieldNames = array();
|
||||
$toBeReplaced = array();
|
||||
$epregReplace = array();
|
||||
foreach ($neededFields as &$neededField) {
|
||||
$parts = explode('.', $neededField);
|
||||
if (count($parts) === 2 && !empty($parts[1])) {
|
||||
if (in_array($parts[1], $fieldNames, $strict = true)) {
|
||||
// eg when selecting 2 dimensions log_action_X.name
|
||||
$columnAs = $parts[1] . md5($neededField);
|
||||
$fieldNames[] = $columnAs;
|
||||
// we make sure to not replace a idvisitor column when duplicate column is idvisit
|
||||
$toBeReplaced[$neededField . ' '] = $parts[0] . '.' . $columnAs . ' ';
|
||||
$toBeReplaced[$neededField . ')'] = $parts[0] . '.' . $columnAs . ')';
|
||||
$toBeReplaced[$neededField . '`'] = $parts[0] . '.' . $columnAs . '`';
|
||||
$toBeReplaced[$neededField . ','] = $parts[0] . '.' . $columnAs . ',';
|
||||
// replace when string ends this, we need to use regex to check for this
|
||||
$epregReplace["/(" . $neededField . ")$/"] = $parts[0] . '.' . $columnAs;
|
||||
$neededField .= ' as ' . $columnAs;
|
||||
} else {
|
||||
$fieldNames[] = $parts[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
preg_match_all("/". $matchTables . "/", $from, $matchesFrom);
|
||||
|
||||
$innerSelect = implode(", \n", $neededFields);
|
||||
$innerFrom = $from;
|
||||
$innerWhere = $where;
|
||||
|
||||
$innerLimitAndOffset = $limitAndOffset;
|
||||
|
||||
$innerOrderBy = "NULL";
|
||||
if ($innerLimitAndOffset && $orderBy) {
|
||||
// only When LIMITing we can apply to the inner query the same ORDER BY as the parent query
|
||||
$innerOrderBy = $orderBy;
|
||||
}
|
||||
if ($innerLimitAndOffset) {
|
||||
// When LIMITing, no need to GROUP BY (GROUPing by is done before the LIMIT which is super slow when large amount of rows is matched)
|
||||
$innerGroupBy = false;
|
||||
}
|
||||
|
||||
if (!isset($innerGroupBy) && in_array('log_visit', $matchesFrom[1])) {
|
||||
$innerGroupBy = "log_visit.idvisit";
|
||||
} elseif (!isset($innerGroupBy)) {
|
||||
throw new Exception('Cannot use subselect for join as no group by rule is specified');
|
||||
}
|
||||
|
||||
if (!empty($toBeReplaced)) {
|
||||
$select = preg_replace(array_keys($epregReplace), array_values($epregReplace), $select);
|
||||
$select = str_replace(array_keys($toBeReplaced), array_values($toBeReplaced), $select);
|
||||
if (!empty($groupBy)) {
|
||||
$groupBy = preg_replace(array_keys($epregReplace), array_values($epregReplace), $groupBy);
|
||||
$groupBy = str_replace(array_keys($toBeReplaced), array_values($toBeReplaced), $groupBy);
|
||||
}
|
||||
if (!empty($orderBy)) {
|
||||
$orderBy = preg_replace(array_keys($epregReplace), array_values($epregReplace), $orderBy);
|
||||
$orderBy = str_replace(array_keys($toBeReplaced), array_values($toBeReplaced), $orderBy);
|
||||
}
|
||||
}
|
||||
|
||||
$innerQuery = $this->buildSelectQuery($innerSelect, $innerFrom, $innerWhere, $innerGroupBy, $innerOrderBy, $innerLimitAndOffset);
|
||||
|
||||
$select = preg_replace('/'.$matchTables.'\./', 'log_inner.', $select);
|
||||
|
||||
$from = "
|
||||
(
|
||||
$innerQuery
|
||||
) AS log_inner";
|
||||
$where = false;
|
||||
$orderBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $orderBy);
|
||||
$groupBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $groupBy);
|
||||
|
||||
$outerLimitAndOffset = null;
|
||||
$query = $this->buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $outerLimitAndOffset);
|
||||
return $query;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Build select query the normal way
|
||||
*
|
||||
* @param string $select fieldlist to be selected
|
||||
* @param string $from tablelist to select from
|
||||
* @param string $where where clause
|
||||
* @param string $groupBy group by clause
|
||||
* @param string $orderBy order by clause
|
||||
* @param string|int $limitAndOffset limit by clause eg '5' for Limit 5 Offset 0 or '10, 5' for Limit 5 Offset 10
|
||||
* @return string
|
||||
*/
|
||||
private function buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limitAndOffset)
|
||||
{
|
||||
$sql = "
|
||||
SELECT
|
||||
$select
|
||||
FROM
|
||||
$from";
|
||||
|
||||
if ($where) {
|
||||
$sql .= "
|
||||
WHERE
|
||||
$where";
|
||||
}
|
||||
|
||||
if ($groupBy) {
|
||||
$sql .= "
|
||||
GROUP BY
|
||||
$groupBy";
|
||||
}
|
||||
|
||||
if ($orderBy) {
|
||||
$sql .= "
|
||||
ORDER BY
|
||||
$orderBy";
|
||||
}
|
||||
|
||||
$sql = $this->appendLimitClauseToQuery($sql, $limitAndOffset);
|
||||
|
||||
return $sql;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $sql
|
||||
* @param $limit LIMIT clause eg. "10, 50" (offset 10, limit 50)
|
||||
* @return string
|
||||
*/
|
||||
private function appendLimitClauseToQuery($sql, $limit)
|
||||
{
|
||||
$limitParts = explode(',', (string) $limit);
|
||||
$isLimitWithOffset = 2 === count($limitParts);
|
||||
|
||||
if ($isLimitWithOffset) {
|
||||
// $limit = "10, 5". We would not have to do this but we do to prevent possible injections.
|
||||
$offset = trim($limitParts[0]);
|
||||
$limit = trim($limitParts[1]);
|
||||
$sql .= sprintf(' LIMIT %d, %d', $offset, $limit);
|
||||
} else {
|
||||
// $limit = "5"
|
||||
$limit = (int)$limit;
|
||||
if ($limit >= 1) {
|
||||
$sql .= " LIMIT $limit";
|
||||
}
|
||||
}
|
||||
|
||||
return $sql;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $where
|
||||
* @param $segmentWhere
|
||||
* @return string
|
||||
* @throws
|
||||
*/
|
||||
protected function getWhereMatchBoth($where, $segmentWhere)
|
||||
{
|
||||
if (empty($segmentWhere) && empty($where)) {
|
||||
throw new \Exception("Segment where clause should be non empty.");
|
||||
}
|
||||
if (empty($segmentWhere)) {
|
||||
return $where;
|
||||
}
|
||||
if (empty($where)) {
|
||||
return $segmentWhere;
|
||||
}
|
||||
return "( $where )
|
||||
AND
|
||||
($segmentWhere)";
|
||||
}
|
||||
}
|
@ -0,0 +1,316 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Piwik\DataAccess\LogQueryBuilder;
|
||||
|
||||
use Exception;
|
||||
use Piwik\Common;
|
||||
use Piwik\Tracker\LogTable;
|
||||
|
||||
class JoinGenerator
|
||||
{
|
||||
/**
|
||||
* @var JoinTables
|
||||
*/
|
||||
protected $tables;
|
||||
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $joinWithSubSelect = false;
|
||||
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
private $joinString = '';
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $nonVisitJoins = array();
|
||||
|
||||
public function __construct(JoinTables $tables)
|
||||
{
|
||||
$this->tables = $tables;
|
||||
$this->addMissingTablesNeededForJoins();
|
||||
}
|
||||
|
||||
private function addMissingTablesNeededForJoins()
|
||||
{
|
||||
foreach ($this->tables as $index => $table) {
|
||||
if (is_array($table)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$logTable = $this->tables->getLogTable($table);
|
||||
|
||||
if (!$logTable->getColumnToJoinOnIdVisit()) {
|
||||
$tableNameToJoin = $logTable->getLinkTableToBeAbleToJoinOnVisit();
|
||||
|
||||
if (empty($tableNameToJoin) && $logTable->getWaysToJoinToOtherLogTables()) {
|
||||
foreach ($logTable->getWaysToJoinToOtherLogTables() as $otherLogTable => $column) {
|
||||
if ($this->tables->hasJoinedTable($otherLogTable)) {
|
||||
$this->tables->addTableDependency($table, $otherLogTable);
|
||||
continue;
|
||||
}
|
||||
if ($this->tables->isTableJoinableOnVisit($otherLogTable) || $this->tables->isTableJoinableOnAction($otherLogTable)) {
|
||||
$this->addMissingTablesForOtherTableJoin($otherLogTable, $table);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($index > 0 && !$this->tables->hasJoinedTable($tableNameToJoin)) {
|
||||
$this->tables->addTableToJoin($tableNameToJoin);
|
||||
}
|
||||
|
||||
if ($this->tables->hasJoinedTable($tableNameToJoin)) {
|
||||
$this->generateNonVisitJoins($table, $tableNameToJoin, $index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($this->tables as $index => $table) {
|
||||
if (is_array($table)) {
|
||||
if (!isset($table['tableAlias'])) {
|
||||
$tableName = $table['table'];
|
||||
$numTables = count($this->tables);
|
||||
for ($j = $index + 1; $j < $numTables; $j++) {
|
||||
if (!isset($this->tables[$j])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$tableOther = $this->tables[$j];
|
||||
if (is_string($tableOther) && $tableOther === $tableName) {
|
||||
unset($this->tables[$j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
} elseif (is_string($table)) {
|
||||
$numTables = count($this->tables);
|
||||
|
||||
for ($j = $index + 1; $j < $numTables; $j++) {
|
||||
if (isset($this->tables[$j]) && is_array($this->tables[$j]) && !isset($this->tables[$j]['tableAlias'])) {
|
||||
$tableOther = $this->tables[$j];
|
||||
if ($table === $tableOther['table']) {
|
||||
$message = sprintf('Please reorganize the joined tables as the table %s in %s cannot be joined correctly. We recommend to join tables with arrays first. %s', $table, json_encode($this->tables), json_encode(debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 10)));
|
||||
throw new Exception($message);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function addMissingTablesForOtherTableJoin($tableName, $dependentTable)
|
||||
{
|
||||
$this->tables->addTableDependency($dependentTable, $tableName);
|
||||
|
||||
if ($this->tables->hasJoinedTable($tableName)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$table = $this->tables->getLogTable($tableName);
|
||||
|
||||
if ($table->getColumnToJoinOnIdAction() || $table->getColumnToJoinOnIdAction() || $table->getLinkTableToBeAbleToJoinOnVisit()) {
|
||||
$this->tables->addTableToJoin($tableName);
|
||||
return;
|
||||
}
|
||||
|
||||
$otherTableJoins = $table->getWaysToJoinToOtherLogTables();
|
||||
|
||||
foreach ($otherTableJoins as $logTable => $column) {
|
||||
$this->addMissingTablesForOtherTableJoin($logTable, $tableName);
|
||||
}
|
||||
|
||||
$this->tables->addTableToJoin($tableName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the join sql based on the needed tables
|
||||
* @throws Exception if tables can't be joined
|
||||
* @return array
|
||||
*/
|
||||
public function generate()
|
||||
{
|
||||
/** @var LogTable[] $availableLogTables */
|
||||
$availableLogTables = array();
|
||||
|
||||
$this->tables->sort();
|
||||
|
||||
foreach ($this->tables as $i => $table) {
|
||||
if (is_array($table)) {
|
||||
|
||||
// join condition provided
|
||||
$alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table'];
|
||||
|
||||
if (isset($table['join'])) {
|
||||
$this->joinString .= ' ' . $table['join'];
|
||||
} else {
|
||||
$this->joinString .= ' LEFT JOIN';
|
||||
}
|
||||
|
||||
if (!isset($table['joinOn']) && $this->tables->getLogTable($table['table'])) {
|
||||
$logTable = $this->tables->getLogTable($table['table']);
|
||||
if (!empty($availableLogTables)) {
|
||||
$table['joinOn'] = $this->findJoinCriteriasForTables($logTable, $availableLogTables);
|
||||
}
|
||||
if (!isset($table['tableAlias'])) {
|
||||
// eg array('table' => 'log_link_visit_action', 'join' => 'RIGHT JOIN')
|
||||
// we treat this like a regular string table which we can join automatically
|
||||
$availableLogTables[$table['table']] = $logTable;
|
||||
}
|
||||
}
|
||||
|
||||
$this->joinString .= ' ' . Common::prefixTable($table['table']) . " AS " . $alias
|
||||
. " ON " . $table['joinOn'];
|
||||
continue;
|
||||
}
|
||||
|
||||
$tableSql = Common::prefixTable($table) . " AS $table";
|
||||
|
||||
$logTable = $this->tables->getLogTable($table);
|
||||
|
||||
if ($i == 0) {
|
||||
// first table
|
||||
$this->joinString .= $tableSql;
|
||||
} else {
|
||||
|
||||
$join = $this->findJoinCriteriasForTables($logTable, $availableLogTables);
|
||||
|
||||
if ($join === null) {
|
||||
$availableLogTables[$table] = $logTable;
|
||||
continue;
|
||||
}
|
||||
|
||||
// the join sql the default way
|
||||
$this->joinString .= " LEFT JOIN $tableSql ON " . $join;
|
||||
}
|
||||
|
||||
$availableLogTables[$table] = $logTable;
|
||||
}
|
||||
}
|
||||
|
||||
public function getJoinString()
|
||||
{
|
||||
return $this->joinString;
|
||||
}
|
||||
|
||||
public function shouldJoinWithSelect()
|
||||
{
|
||||
return $this->joinWithSubSelect;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param LogTable $logTable
|
||||
* @param LogTable[] $availableLogTables
|
||||
* @return string|null returns null in case the table is already joined, or the join string if the table needs
|
||||
* to be joined
|
||||
* @throws Exception if table cannot be joined for segmentation
|
||||
*/
|
||||
public function findJoinCriteriasForTables(LogTable $logTable, $availableLogTables)
|
||||
{
|
||||
$join = null;
|
||||
$alternativeJoin = null;
|
||||
$table = $logTable->getName();
|
||||
|
||||
foreach ($availableLogTables as $availableLogTable) {
|
||||
if ($logTable->getColumnToJoinOnIdVisit() && $availableLogTable->getColumnToJoinOnIdVisit()) {
|
||||
|
||||
$join = sprintf("%s.%s = %s.%s", $table, $logTable->getColumnToJoinOnIdVisit(),
|
||||
$availableLogTable->getName(), $availableLogTable->getColumnToJoinOnIdVisit());
|
||||
$alternativeJoin = sprintf("%s.%s = %s.%s", $availableLogTable->getName(), $availableLogTable->getColumnToJoinOnIdVisit(),
|
||||
$table, $logTable->getColumnToJoinOnIdVisit());
|
||||
|
||||
if ($availableLogTable->shouldJoinWithSubSelect()) {
|
||||
$this->joinWithSubSelect = true;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if ($logTable->getColumnToJoinOnIdAction() && $availableLogTable->getColumnToJoinOnIdAction()) {
|
||||
if (isset($this->nonVisitJoins[$logTable->getName()][$availableLogTable->getName()])) {
|
||||
$join = $this->nonVisitJoins[$logTable->getName()][$availableLogTable->getName()];
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
$otherJoins = $logTable->getWaysToJoinToOtherLogTables();
|
||||
foreach ($otherJoins as $joinTable => $column) {
|
||||
if($availableLogTable->getName() == $joinTable) {
|
||||
$join = sprintf("`%s`.`%s` = `%s`.`%s`", $table, $column, $availableLogTable->getName(), $column);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!isset($join)) {
|
||||
throw new Exception("Table '$table' can't be joined for segmentation");
|
||||
}
|
||||
|
||||
if ($this->tables->hasJoinedTableManually($table, $join)
|
||||
|| $this->tables->hasJoinedTableManually($table, $alternativeJoin)) {
|
||||
// already joined, no need to join it again
|
||||
return null;
|
||||
}
|
||||
|
||||
return $join;
|
||||
}
|
||||
|
||||
/**
|
||||
* This code is a bit tricky. We have to execute this right at the beginning before actually iterating over all the
|
||||
* tables and generating the join string as we may have to delete a table from the tables. If we did not delete
|
||||
* this table upfront, we would have maybe already added a joinString for that table, even though it will be later
|
||||
* removed by another table. This means if we wouldn't delete/unset that table upfront, we would need to alter
|
||||
* an already generated join string which would not be really nice code as well.
|
||||
*
|
||||
* Next problem is, because we are deleting a table, we have to remember the "joinOn" string for that table in a
|
||||
* property "nonVisitJoins". Otherwise we would not be able to generate the correct "joinOn" string when actually
|
||||
* iterating over all the tables to generate that string.
|
||||
*
|
||||
* @param $tableName
|
||||
* @param $tableNameToJoin
|
||||
* @param $index
|
||||
*/
|
||||
protected function generateNonVisitJoins($tableName, $tableNameToJoin, $index)
|
||||
{
|
||||
$logTable = $this->tables->getLogTable($tableName);
|
||||
$logTableToJoin = $this->tables->getLogTable($tableNameToJoin);
|
||||
|
||||
$nonVisitJoin = sprintf("%s.%s = %s.%s", $logTableToJoin->getName(), $logTableToJoin->getColumnToJoinOnIdAction(),
|
||||
$tableName, $logTable->getColumnToJoinOnIdAction());
|
||||
|
||||
$altNonVisitJoin = sprintf("%s.%s = %s.%s", $tableName, $logTable->getColumnToJoinOnIdAction(),
|
||||
$logTableToJoin->getName(), $logTableToJoin->getColumnToJoinOnIdAction());
|
||||
|
||||
if ($index > 0
|
||||
&& $this->tables->hasAddedTableManually($tableName)
|
||||
&& !$this->tables->hasJoinedTableManually($tableName, $nonVisitJoin)
|
||||
&& !$this->tables->hasJoinedTableManually($tableName, $altNonVisitJoin)) {
|
||||
$tableIndex = $this->tables->findIndexOfManuallyAddedTable($tableName);
|
||||
$nonVisitJoin = '(' . $this->tables[$tableIndex]['joinOn'] . ' AND ' . $nonVisitJoin . ')';
|
||||
unset($this->tables[$tableIndex]);
|
||||
}
|
||||
|
||||
if (!isset($this->nonVisitJoins[$tableName])) {
|
||||
$this->nonVisitJoins[$tableName] = array();
|
||||
}
|
||||
|
||||
if (!isset($this->nonVisitJoins[$tableNameToJoin])) {
|
||||
$this->nonVisitJoins[$tableNameToJoin] = array();
|
||||
}
|
||||
|
||||
$this->nonVisitJoins[$tableName][$tableNameToJoin] = $nonVisitJoin;
|
||||
$this->nonVisitJoins[$tableNameToJoin][$tableName] = $nonVisitJoin;
|
||||
}
|
||||
}
|
@ -0,0 +1,358 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Piwik\DataAccess\LogQueryBuilder;
|
||||
|
||||
use Exception;
|
||||
use Piwik\Plugin\LogTablesProvider;
|
||||
|
||||
class JoinTables extends \ArrayObject
|
||||
{
|
||||
/**
|
||||
* @var LogTablesProvider
|
||||
*/
|
||||
private $logTableProvider;
|
||||
|
||||
// NOTE: joins can be specified explicitly as arrays w/ 'joinOn' keys or implicitly as table names. when
|
||||
// table names are used, the joins dependencies are assumed based on how we want to order those joins.
|
||||
// the below table list the possible dependencies of each table, and is specifically designed to enforce
|
||||
// the following order:
|
||||
// log_link_visit_action, log_action, log_visit, log_conversion, log_conversion_item
|
||||
// which means if an array is supplied where log_visit comes before log_link_visitAction, it will
|
||||
// be moved to after it.
|
||||
private $implicitTableDependencies = [
|
||||
'log_link_visit_action' => [
|
||||
// empty
|
||||
],
|
||||
'log_action' => [
|
||||
'log_link_visit_action',
|
||||
'log_conversion',
|
||||
'log_conversion_item',
|
||||
'log_visit',
|
||||
],
|
||||
'log_visit' => [
|
||||
'log_link_visit_action',
|
||||
'log_action',
|
||||
],
|
||||
'log_conversion' => [
|
||||
'log_link_visit_action',
|
||||
'log_action',
|
||||
'log_visit',
|
||||
],
|
||||
'log_conversion_item' => [
|
||||
'log_link_visit_action',
|
||||
'log_action',
|
||||
'log_visit',
|
||||
'log_conversion',
|
||||
],
|
||||
];
|
||||
|
||||
/**
|
||||
* Tables constructor.
|
||||
* @param LogTablesProvider $logTablesProvider
|
||||
* @param array $tables
|
||||
*/
|
||||
public function __construct(LogTablesProvider $logTablesProvider, $tables)
|
||||
{
|
||||
$this->logTableProvider = $logTablesProvider;
|
||||
|
||||
foreach ($tables as $table) {
|
||||
$this->checkTableCanBeUsedForSegmentation($table);
|
||||
}
|
||||
|
||||
$this->exchangeArray(array_values($tables));
|
||||
}
|
||||
|
||||
public function getTables()
|
||||
{
|
||||
return $this->getArrayCopy();
|
||||
}
|
||||
|
||||
public function addTableToJoin($tableName)
|
||||
{
|
||||
$this->checkTableCanBeUsedForSegmentation($tableName);
|
||||
$this->append($tableName);
|
||||
}
|
||||
|
||||
public function hasJoinedTable($tableName)
|
||||
{
|
||||
$tables = in_array($tableName, $this->getTables());
|
||||
if ($tables) {
|
||||
return true;
|
||||
}
|
||||
|
||||
foreach ($this as $table) {
|
||||
if (is_array($table)) {
|
||||
if (!isset($table['tableAlias']) && $table['table'] === $table) {
|
||||
return true;
|
||||
} elseif (isset($table['tableAlias']) && $table['tableAlias'] === $table) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public function hasJoinedTableManually($tableToFind, $joinToFind)
|
||||
{
|
||||
foreach ($this as $table) {
|
||||
if (is_array($table)
|
||||
&& !empty($table['table'])
|
||||
&& $table['table'] === $tableToFind
|
||||
&& (!isset($table['tableAlias']) || $table['tableAlias'] === $tableToFind)
|
||||
&& (!isset($table['join']) || strtolower($table['join']) === 'left join')
|
||||
&& isset($table['joinOn']) && $table['joinOn'] === $joinToFind) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public function getLogTable($tableName)
|
||||
{
|
||||
return $this->logTableProvider->getLogTable($tableName);
|
||||
}
|
||||
|
||||
public function findIndexOfManuallyAddedTable($tableNameToFind)
|
||||
{
|
||||
foreach ($this as $index => $table) {
|
||||
if (is_array($table)
|
||||
&& !empty($table['table'])
|
||||
&& $table['table'] === $tableNameToFind
|
||||
&& (!isset($table['join']) || strtolower($table['join']) === 'left join')
|
||||
&& (!isset($table['tableAlias']) || $table['tableAlias'] === $tableNameToFind)) {
|
||||
return $index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function hasAddedTableManually($tableToFind)
|
||||
{
|
||||
$table = $this->findIndexOfManuallyAddedTable($tableToFind);
|
||||
|
||||
return isset($table);
|
||||
}
|
||||
|
||||
public function sort()
|
||||
{
|
||||
// we do not use $this->uasort as we do not want to maintain keys
|
||||
$tables = $this->getTables();
|
||||
|
||||
// the first entry is always the FROM table
|
||||
$firstTable = array_shift($tables);
|
||||
|
||||
$dependencies = $this->parseDependencies($tables);
|
||||
|
||||
$sorted = [$firstTable];
|
||||
$this->visitTableListDfs($tables, $dependencies, function ($tableInfo) use (&$sorted) {
|
||||
$sorted[] = $tableInfo;
|
||||
});
|
||||
|
||||
$this->exchangeArray($sorted);
|
||||
}
|
||||
|
||||
public function isTableJoinableOnVisit($tableToCheck)
|
||||
{
|
||||
$table = $this->getLogTable($tableToCheck);
|
||||
|
||||
if (empty($table)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($table->getColumnToJoinOnIdVisit()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($table->getLinkTableToBeAbleToJoinOnVisit()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$otherWays = $table->getWaysToJoinToOtherLogTables();
|
||||
|
||||
if (empty($otherWays)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach ($otherWays as $logTable => $column) {
|
||||
if ($logTable == 'log_visit' || $this->isTableJoinableOnVisit($logTable)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public function isTableJoinableOnAction($tableToCheck)
|
||||
{
|
||||
$table = $this->getLogTable($tableToCheck);
|
||||
|
||||
if (empty($table)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($table->getColumnToJoinOnIdAction()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$otherWays = $table->getWaysToJoinToOtherLogTables();
|
||||
|
||||
if (empty($otherWays)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach ($otherWays as $logTable => $column) {
|
||||
if ($logTable == 'log_action' || $this->isTableJoinableOnAction($logTable)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public function addTableDependency($table, $dependentTable)
|
||||
{
|
||||
if (!empty($this->implicitTableDependencies[$table])) {
|
||||
return;
|
||||
}
|
||||
|
||||
$this->implicitTableDependencies[$table] = [$dependentTable];
|
||||
}
|
||||
|
||||
private function checkTableCanBeUsedForSegmentation($tableName)
|
||||
{
|
||||
if (!is_array($tableName) && !$this->getLogTable($tableName)) {
|
||||
throw new Exception("Table '$tableName' can't be used for segmentation");
|
||||
}
|
||||
}
|
||||
|
||||
private function parseDependencies(array $tables)
|
||||
{
|
||||
$dependencies = [];
|
||||
foreach ($tables as $key => &$fromInfo) {
|
||||
if (is_string($fromInfo)) {
|
||||
$dependencies[$key] = $this->assumeImplicitJoinDependencies($tables, $fromInfo);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (empty($fromInfo['joinOn'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$table = isset($fromInfo['tableAlias']) ? $fromInfo['tableAlias'] : $fromInfo['table'];
|
||||
$tablesInExpr = $this->parseSqlTables($fromInfo['joinOn'], $table);
|
||||
$dependencies[$key] = $tablesInExpr;
|
||||
}
|
||||
return $dependencies;
|
||||
}
|
||||
|
||||
private function assumeImplicitJoinDependencies($allTablesToQuery, $table)
|
||||
{
|
||||
$implicitTableDependencies = $this->implicitTableDependencies;
|
||||
|
||||
$result = [];
|
||||
if (isset($implicitTableDependencies[$table])) {
|
||||
$result = $implicitTableDependencies[$table];
|
||||
|
||||
// only include dependencies that are in the list of requested tables (ie, if we want to
|
||||
// query from log_conversion joining on log_link_visit_action, we don't want to add log_visit
|
||||
// to the sql statement)
|
||||
$result = array_filter($result, function ($table) use ($allTablesToQuery) {
|
||||
return $this->isInTableArray($allTablesToQuery, $table);
|
||||
});
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
private function isInTableArray($tables, $table)
|
||||
{
|
||||
foreach ($tables as $entry) {
|
||||
if (is_string($entry)
|
||||
&& $entry == $table
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (is_array($entry)
|
||||
&& $entry['table'] == $table
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private function parseSqlTables($joinOn, $self)
|
||||
{
|
||||
preg_match_all('/\b([a-zA-Z0-9_`]+)\.[a-zA-Z0-9_`]+\b/', $joinOn, $matches);
|
||||
|
||||
$tables = [];
|
||||
foreach ($matches[1] as $table) {
|
||||
if ($table === $self) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$tables[] = $table;
|
||||
}
|
||||
return $tables;
|
||||
}
|
||||
|
||||
private function visitTableListDfs($tables, $dependencies, $visitor)
|
||||
{
|
||||
$visited = [];
|
||||
foreach ($tables as $index => $tableInfo) {
|
||||
if (empty($visited[$index])) {
|
||||
$this->visitTableListDfsSingle($tables, $dependencies, $visitor, $index, $visited);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function visitTableListDfsSingle($tables, $dependencies, $visitor, $tableToVisitIndex, &$visited)
|
||||
{
|
||||
$visited[$tableToVisitIndex] = true;
|
||||
$tableToVisit = $tables[$tableToVisitIndex];
|
||||
|
||||
if (!empty($dependencies[$tableToVisitIndex])) {
|
||||
foreach ($dependencies[$tableToVisitIndex] as $dependencyTableName) {
|
||||
$dependentTableToVisit = $this->findTableIndex($tables, $dependencyTableName);
|
||||
if ($dependentTableToVisit === null) { // sanity check, in case the dependent table is not in the list of tables to query
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!empty($visited[$dependentTableToVisit])) { // skip if already visited
|
||||
continue;
|
||||
}
|
||||
|
||||
// visit dependent table...
|
||||
$this->visitTableListDfsSingle($tables, $dependencies, $visitor, $dependentTableToVisit, $visited);
|
||||
}
|
||||
}
|
||||
|
||||
// ...then visit current table
|
||||
$visitor($tableToVisit);
|
||||
}
|
||||
|
||||
private function findTableIndex($tables, $tableToSearchFor)
|
||||
{
|
||||
foreach ($tables as $key => $info) {
|
||||
$tableName = null;
|
||||
if (is_string($info)) {
|
||||
$tableName = $info;
|
||||
} else if (is_array($info)) {
|
||||
$tableName = isset($info['tableAlias']) ? $info['tableAlias'] : $info['table'];
|
||||
}
|
||||
|
||||
if ($tableName == $tableToSearchFor) {
|
||||
return $key;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
359
msd2/tracking/piwik/core/DataAccess/Model.php
Normal file
359
msd2/tracking/piwik/core/DataAccess/Model.php
Normal file
@ -0,0 +1,359 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*
|
||||
*/
|
||||
namespace Piwik\DataAccess;
|
||||
|
||||
use Exception;
|
||||
use Piwik\ArchiveProcessor\Rules;
|
||||
use Piwik\Common;
|
||||
use Piwik\Container\StaticContainer;
|
||||
use Piwik\Db;
|
||||
use Piwik\DbHelper;
|
||||
use Piwik\Period;
|
||||
use Piwik\Segment;
|
||||
use Piwik\Sequence;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
/**
|
||||
* Cleans up outdated archives
|
||||
*
|
||||
* @package Piwik\DataAccess
|
||||
*/
|
||||
class Model
|
||||
{
|
||||
/**
|
||||
* @var LoggerInterface
|
||||
*/
|
||||
private $logger;
|
||||
|
||||
public function __construct(LoggerInterface $logger = null)
|
||||
{
|
||||
$this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the archives IDs that have already been invalidated and have been since re-processed.
|
||||
*
|
||||
* These archives { archive name (includes segment hash) , idsite, date, period } will be deleted.
|
||||
*
|
||||
* @param string $archiveTable
|
||||
* @param array $idSites
|
||||
* @return array
|
||||
* @throws Exception
|
||||
*/
|
||||
public function getInvalidatedArchiveIdsSafeToDelete($archiveTable, array $idSites)
|
||||
{
|
||||
try {
|
||||
Db::get()->query('SET SESSION group_concat_max_len=' . (128 * 1024));
|
||||
} catch (\Exception $ex) {
|
||||
$this->logger->info("Could not set group_concat_max_len MySQL session variable.");
|
||||
}
|
||||
|
||||
$idSites = array_map(function ($v) { return (int)$v; }, $idSites);
|
||||
|
||||
$sql = "SELECT idsite, date1, date2, period, name,
|
||||
GROUP_CONCAT(idarchive, '.', value ORDER BY ts_archived DESC) as archives
|
||||
FROM `$archiveTable`
|
||||
WHERE name LIKE 'done%'
|
||||
AND value IN (" . ArchiveWriter::DONE_INVALIDATED . ','
|
||||
. ArchiveWriter::DONE_OK . ','
|
||||
. ArchiveWriter::DONE_OK_TEMPORARY . ")
|
||||
AND idsite IN (" . implode(',', $idSites) . ")
|
||||
GROUP BY idsite, date1, date2, period, name";
|
||||
|
||||
$archiveIds = array();
|
||||
|
||||
$rows = Db::fetchAll($sql);
|
||||
foreach ($rows as $row) {
|
||||
$duplicateArchives = explode(',', $row['archives']);
|
||||
|
||||
$firstArchive = array_shift($duplicateArchives);
|
||||
list($firstArchiveId, $firstArchiveValue) = explode('.', $firstArchive);
|
||||
|
||||
// if the first archive (ie, the newest) is an 'ok' or 'ok temporary' archive, then
|
||||
// all invalidated archives after it can be deleted
|
||||
if ($firstArchiveValue == ArchiveWriter::DONE_OK
|
||||
|| $firstArchiveValue == ArchiveWriter::DONE_OK_TEMPORARY
|
||||
) {
|
||||
foreach ($duplicateArchives as $pair) {
|
||||
if (strpos($pair, '.') === false) {
|
||||
$this->logger->info("GROUP_CONCAT cut off the query result, you may have to purge archives again.");
|
||||
break;
|
||||
}
|
||||
|
||||
list($idarchive, $value) = explode('.', $pair);
|
||||
if ($value == ArchiveWriter::DONE_INVALIDATED) {
|
||||
$archiveIds[] = $idarchive;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $archiveIds;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $archiveTable Prefixed table name
|
||||
* @param int[] $idSites
|
||||
* @param string[][] $datesByPeriodType
|
||||
* @param Segment $segment
|
||||
* @return \Zend_Db_Statement
|
||||
* @throws Exception
|
||||
*/
|
||||
public function updateArchiveAsInvalidated($archiveTable, $idSites, $datesByPeriodType, Segment $segment = null)
|
||||
{
|
||||
$idSites = array_map('intval', $idSites);
|
||||
|
||||
$bind = array();
|
||||
|
||||
$periodConditions = array();
|
||||
foreach ($datesByPeriodType as $periodType => $dates) {
|
||||
$dateConditions = array();
|
||||
|
||||
foreach ($dates as $date) {
|
||||
$dateConditions[] = "(date1 <= ? AND ? <= date2)";
|
||||
$bind[] = $date;
|
||||
$bind[] = $date;
|
||||
}
|
||||
|
||||
$dateConditionsSql = implode(" OR ", $dateConditions);
|
||||
if (empty($periodType)
|
||||
|| $periodType == Period\Day::PERIOD_ID
|
||||
) {
|
||||
// invalidate all periods if no period supplied or period is day
|
||||
$periodConditions[] = "($dateConditionsSql)";
|
||||
} else if ($periodType == Period\Range::PERIOD_ID) {
|
||||
$periodConditions[] = "(period = " . Period\Range::PERIOD_ID . " AND ($dateConditionsSql))";
|
||||
} else {
|
||||
// for non-day periods, invalidate greater periods, but not range periods
|
||||
$periodConditions[] = "(period >= " . (int)$periodType . " AND period < " . Period\Range::PERIOD_ID . " AND ($dateConditionsSql))";
|
||||
}
|
||||
}
|
||||
|
||||
if ($segment) {
|
||||
$nameCondition = "name LIKE '" . Rules::getDoneFlagArchiveContainsAllPlugins($segment) . "%'";
|
||||
} else {
|
||||
$nameCondition = "name LIKE 'done%'";
|
||||
}
|
||||
|
||||
$sql = "UPDATE $archiveTable SET value = " . ArchiveWriter::DONE_INVALIDATED
|
||||
. " WHERE $nameCondition
|
||||
AND idsite IN (" . implode(", ", $idSites) . ")
|
||||
AND (" . implode(" OR ", $periodConditions) . ")";
|
||||
|
||||
return Db::query($sql, $bind);
|
||||
}
|
||||
|
||||
|
||||
public function getTemporaryArchivesOlderThan($archiveTable, $purgeArchivesOlderThan)
|
||||
{
|
||||
$query = "SELECT idarchive FROM " . $archiveTable . "
|
||||
WHERE name LIKE 'done%'
|
||||
AND (( value = " . ArchiveWriter::DONE_OK_TEMPORARY . "
|
||||
AND ts_archived < ?)
|
||||
OR value = " . ArchiveWriter::DONE_ERROR . ")";
|
||||
|
||||
return Db::fetchAll($query, array($purgeArchivesOlderThan));
|
||||
}
|
||||
|
||||
public function deleteArchivesWithPeriod($numericTable, $blobTable, $period, $date)
|
||||
{
|
||||
$query = "DELETE FROM %s WHERE period = ? AND ts_archived < ?";
|
||||
$bind = array($period, $date);
|
||||
|
||||
$queryObj = Db::query(sprintf($query, $numericTable), $bind);
|
||||
$deletedRows = $queryObj->rowCount();
|
||||
|
||||
try {
|
||||
$queryObj = Db::query(sprintf($query, $blobTable), $bind);
|
||||
$deletedRows += $queryObj->rowCount();
|
||||
} catch (Exception $e) {
|
||||
// Individual blob tables could be missing
|
||||
$this->logger->debug("Unable to delete archives by period from {blobTable}.", array(
|
||||
'blobTable' => $blobTable,
|
||||
'exception' => $e,
|
||||
));
|
||||
}
|
||||
|
||||
return $deletedRows;
|
||||
}
|
||||
|
||||
public function deleteArchiveIds($numericTable, $blobTable, $idsToDelete)
|
||||
{
|
||||
$idsToDelete = array_values($idsToDelete);
|
||||
$query = "DELETE FROM %s WHERE idarchive IN (" . Common::getSqlStringFieldsArray($idsToDelete) . ")";
|
||||
|
||||
$queryObj = Db::query(sprintf($query, $numericTable), $idsToDelete);
|
||||
$deletedRows = $queryObj->rowCount();
|
||||
|
||||
try {
|
||||
$queryObj = Db::query(sprintf($query, $blobTable), $idsToDelete);
|
||||
$deletedRows += $queryObj->rowCount();
|
||||
} catch (Exception $e) {
|
||||
// Individual blob tables could be missing
|
||||
$this->logger->debug("Unable to delete archive IDs from {blobTable}.", array(
|
||||
'blobTable' => $blobTable,
|
||||
'exception' => $e,
|
||||
));
|
||||
}
|
||||
|
||||
return $deletedRows;
|
||||
}
|
||||
|
||||
public function getArchiveIdAndVisits($numericTable, $idSite, $period, $dateStartIso, $dateEndIso, $minDatetimeIsoArchiveProcessedUTC, $doneFlags, $doneFlagValues)
|
||||
{
|
||||
$bindSQL = array($idSite,
|
||||
$dateStartIso,
|
||||
$dateEndIso,
|
||||
$period,
|
||||
);
|
||||
|
||||
$timeStampWhere = '';
|
||||
if ($minDatetimeIsoArchiveProcessedUTC) {
|
||||
$timeStampWhere = " AND ts_archived >= ? ";
|
||||
$bindSQL[] = $minDatetimeIsoArchiveProcessedUTC;
|
||||
}
|
||||
|
||||
$sqlWhereArchiveName = self::getNameCondition($doneFlags, $doneFlagValues);
|
||||
|
||||
$sqlQuery = "SELECT idarchive, value, name, date1 as startDate FROM $numericTable
|
||||
WHERE idsite = ?
|
||||
AND date1 = ?
|
||||
AND date2 = ?
|
||||
AND period = ?
|
||||
AND ( ($sqlWhereArchiveName)
|
||||
OR name = '" . ArchiveSelector::NB_VISITS_RECORD_LOOKED_UP . "'
|
||||
OR name = '" . ArchiveSelector::NB_VISITS_CONVERTED_RECORD_LOOKED_UP . "')
|
||||
$timeStampWhere
|
||||
ORDER BY idarchive DESC";
|
||||
$results = Db::fetchAll($sqlQuery, $bindSQL);
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
public function createArchiveTable($tableName, $tableNamePrefix)
|
||||
{
|
||||
$db = Db::get();
|
||||
$sql = DbHelper::getTableCreateSql($tableNamePrefix);
|
||||
|
||||
// replace table name template by real name
|
||||
$tableNamePrefix = Common::prefixTable($tableNamePrefix);
|
||||
$sql = str_replace($tableNamePrefix, $tableName, $sql);
|
||||
|
||||
try {
|
||||
$db->query($sql);
|
||||
} catch (Exception $e) {
|
||||
// accept mysql error 1050: table already exists, throw otherwise
|
||||
if (!$db->isErrNo($e, '1050')) {
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if (ArchiveTableCreator::NUMERIC_TABLE === ArchiveTableCreator::getTypeFromTableName($tableName)) {
|
||||
$sequence = new Sequence($tableName);
|
||||
$sequence->create();
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
}
|
||||
}
|
||||
|
||||
public function allocateNewArchiveId($numericTable)
|
||||
{
|
||||
$sequence = new Sequence($numericTable);
|
||||
|
||||
try {
|
||||
$idarchive = $sequence->getNextId();
|
||||
} catch (Exception $e) {
|
||||
// edge case: sequence was not found, create it now
|
||||
$sequence->create();
|
||||
|
||||
$idarchive = $sequence->getNextId();
|
||||
}
|
||||
|
||||
return $idarchive;
|
||||
}
|
||||
|
||||
public function deletePreviousArchiveStatus($numericTable, $archiveId, $doneFlag)
|
||||
{
|
||||
$tableWithoutLeadingPrefix = $numericTable;
|
||||
$lenNumericTableWithoutPrefix = strlen('archive_numeric_MM_YYYY');
|
||||
|
||||
if (strlen($numericTable) >= $lenNumericTableWithoutPrefix) {
|
||||
$tableWithoutLeadingPrefix = substr($numericTable, strlen($numericTable) - $lenNumericTableWithoutPrefix);
|
||||
// we need to make sure lock name is less than 64 characters see https://github.com/piwik/piwik/issues/9131
|
||||
}
|
||||
$dbLockName = "rmPrevArchiveStatus.$tableWithoutLeadingPrefix.$archiveId";
|
||||
|
||||
// without advisory lock here, the DELETE would acquire Exclusive Lock
|
||||
$this->acquireArchiveTableLock($dbLockName);
|
||||
|
||||
Db::query("DELETE FROM $numericTable WHERE idarchive = ? AND (name = '" . $doneFlag . "')",
|
||||
array($archiveId)
|
||||
);
|
||||
|
||||
$this->releaseArchiveTableLock($dbLockName);
|
||||
}
|
||||
|
||||
public function insertRecord($tableName, $fields, $record, $name, $value)
|
||||
{
|
||||
// duplicate idarchives are Ignored, see https://github.com/piwik/piwik/issues/987
|
||||
$query = "INSERT IGNORE INTO " . $tableName . " (" . implode(", ", $fields) . ")
|
||||
VALUES (?,?,?,?,?,?,?,?) ON DUPLICATE KEY UPDATE " . end($fields) . " = ?";
|
||||
|
||||
$bindSql = $record;
|
||||
$bindSql[] = $name;
|
||||
$bindSql[] = $value;
|
||||
$bindSql[] = $value;
|
||||
|
||||
Db::query($query, $bindSql);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the site IDs for invalidated archives in an archive table.
|
||||
*
|
||||
* @param string $numericTable The numeric table to search through.
|
||||
* @return int[]
|
||||
*/
|
||||
public function getSitesWithInvalidatedArchive($numericTable)
|
||||
{
|
||||
$rows = Db::fetchAll("SELECT DISTINCT idsite FROM `$numericTable` WHERE name LIKE 'done%' AND value = " . ArchiveWriter::DONE_INVALIDATED);
|
||||
|
||||
$result = array();
|
||||
foreach ($rows as $row) {
|
||||
$result[] = $row['idsite'];
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the SQL condition used to find successfully completed archives that
|
||||
* this instance is querying for.
|
||||
*/
|
||||
private static function getNameCondition($doneFlags, $possibleValues)
|
||||
{
|
||||
$allDoneFlags = "'" . implode("','", $doneFlags) . "'";
|
||||
|
||||
// create the SQL to find archives that are DONE
|
||||
return "((name IN ($allDoneFlags)) AND (value IN (" . implode(',', $possibleValues) . ")))";
|
||||
}
|
||||
|
||||
protected function acquireArchiveTableLock($dbLockName)
|
||||
{
|
||||
if (Db::getDbLock($dbLockName, $maxRetries = 30) === false) {
|
||||
throw new Exception("Cannot get named lock $dbLockName.");
|
||||
}
|
||||
}
|
||||
|
||||
protected function releaseArchiveTableLock($dbLockName)
|
||||
{
|
||||
Db::releaseDbLock($dbLockName);
|
||||
}
|
||||
}
|
416
msd2/tracking/piwik/core/DataAccess/RawLogDao.php
Normal file
416
msd2/tracking/piwik/core/DataAccess/RawLogDao.php
Normal file
@ -0,0 +1,416 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*
|
||||
*/
|
||||
namespace Piwik\DataAccess;
|
||||
|
||||
use Piwik\Common;
|
||||
use Piwik\Container\StaticContainer;
|
||||
use Piwik\Db;
|
||||
use Piwik\Plugin\Dimension\DimensionMetadataProvider;
|
||||
use Piwik\Plugin\LogTablesProvider;
|
||||
|
||||
/**
|
||||
* DAO that queries log tables.
|
||||
*/
|
||||
class RawLogDao
|
||||
{
|
||||
const DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME = 'tmp_log_actions_to_keep';
|
||||
|
||||
/**
|
||||
* @var DimensionMetadataProvider
|
||||
*/
|
||||
private $dimensionMetadataProvider;
|
||||
|
||||
/**
|
||||
* @var LogTablesProvider
|
||||
*/
|
||||
private $logTablesProvider;
|
||||
|
||||
public function __construct(DimensionMetadataProvider $provider = null, LogTablesProvider $logTablesProvider = null)
|
||||
{
|
||||
$this->dimensionMetadataProvider = $provider ?: StaticContainer::get('Piwik\Plugin\Dimension\DimensionMetadataProvider');
|
||||
$this->logTablesProvider = $logTablesProvider ?: StaticContainer::get('Piwik\Plugin\LogTablesProvider');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $values
|
||||
* @param string $idVisit
|
||||
*/
|
||||
public function updateVisits(array $values, $idVisit)
|
||||
{
|
||||
$sql = "UPDATE " . Common::prefixTable('log_visit')
|
||||
. " SET " . $this->getColumnSetExpressions(array_keys($values))
|
||||
. " WHERE idvisit = ?";
|
||||
|
||||
$this->update($sql, $values, $idVisit);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $values
|
||||
* @param string $idVisit
|
||||
*/
|
||||
public function updateConversions(array $values, $idVisit)
|
||||
{
|
||||
$sql = "UPDATE " . Common::prefixTable('log_conversion')
|
||||
. " SET " . $this->getColumnSetExpressions(array_keys($values))
|
||||
. " WHERE idvisit = ?";
|
||||
|
||||
$this->update($sql, $values, $idVisit);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $from
|
||||
* @param string $to
|
||||
* @return int
|
||||
*/
|
||||
public function countVisitsWithDatesLimit($from, $to)
|
||||
{
|
||||
$sql = "SELECT COUNT(*) AS num_rows"
|
||||
. " FROM " . Common::prefixTable('log_visit')
|
||||
. " WHERE visit_last_action_time >= ? AND visit_last_action_time < ?";
|
||||
|
||||
$bind = array($from, $to);
|
||||
|
||||
return (int) Db::fetchOne($sql, $bind);
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterates over logs in a log table in chunks. Parameters to this function are as backend agnostic
|
||||
* as possible w/o dramatically increasing code complexity.
|
||||
*
|
||||
* @param string $logTable The log table name. Unprefixed, eg, `log_visit`.
|
||||
* @param array[] $conditions An array describing the conditions logs must match in the query. Translates to
|
||||
* the WHERE part of a SELECT statement. Each element must contain three elements:
|
||||
*
|
||||
* * the column name
|
||||
* * the operator (ie, '=', '<>', '<', etc.)
|
||||
* * the operand (ie, a value)
|
||||
*
|
||||
* The elements are AND-ed together.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* array(
|
||||
* array('visit_first_action_time', '>=', ...),
|
||||
* array('visit_first_action_time', '<', ...)
|
||||
* )
|
||||
* ```
|
||||
* @param int $iterationStep The number of rows to query at a time.
|
||||
* @param callable $callback The callback that processes each chunk of rows.
|
||||
*/
|
||||
public function forAllLogs($logTable, $fields, $conditions, $iterationStep, $callback)
|
||||
{
|
||||
$idField = $this->getIdFieldForLogTable($logTable);
|
||||
list($query, $bind) = $this->createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep);
|
||||
|
||||
$lastId = 0;
|
||||
do {
|
||||
$rows = Db::fetchAll($query, array_merge(array($lastId), $bind));
|
||||
if (!empty($rows)) {
|
||||
$lastId = $rows[count($rows) - 1][$idField];
|
||||
|
||||
$callback($rows);
|
||||
}
|
||||
} while (count($rows) == $iterationStep);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes conversions for the supplied visit IDs from log_conversion. This method does not cascade, so
|
||||
* conversion items will not be deleted.
|
||||
*
|
||||
* @param int[] $visitIds
|
||||
* @return int The number of deleted rows.
|
||||
*/
|
||||
public function deleteFromLogTable($tableName, $visitIds)
|
||||
{
|
||||
$sql = "DELETE FROM `" . Common::prefixTable($tableName) . "` WHERE idvisit IN "
|
||||
. $this->getInFieldExpressionWithInts($visitIds);
|
||||
|
||||
$statement = Db::query($sql);
|
||||
return $statement->rowCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes conversion items for the supplied visit IDs from log_conversion_item.
|
||||
*
|
||||
* @param int[] $visitIds
|
||||
* @return int The number of deleted rows.
|
||||
*/
|
||||
public function deleteConversionItems($visitIds)
|
||||
{
|
||||
$sql = "DELETE FROM `" . Common::prefixTable('log_conversion_item') . "` WHERE idvisit IN "
|
||||
. $this->getInFieldExpressionWithInts($visitIds);
|
||||
|
||||
$statement = Db::query($sql);
|
||||
return $statement->rowCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes all unused entries from the log_action table. This method uses a temporary table to store used
|
||||
* actions, and then deletes rows from log_action that are not in this temporary table.
|
||||
*
|
||||
* Table locking is required to avoid concurrency issues.
|
||||
*
|
||||
* @throws \Exception If table locking permission is not granted to the current MySQL user.
|
||||
*/
|
||||
public function deleteUnusedLogActions()
|
||||
{
|
||||
if (!Db::isLockPrivilegeGranted()) {
|
||||
throw new \Exception("RawLogDao.deleteUnusedLogActions() requires table locking permission in order to complete without error.");
|
||||
}
|
||||
|
||||
// get current max ID in log tables w/ idaction references.
|
||||
$maxIds = $this->getMaxIdsInLogTables();
|
||||
|
||||
$this->createTempTableForStoringUsedActions();
|
||||
|
||||
// do large insert (inserting everything before maxIds) w/o locking tables...
|
||||
$this->insertActionsToKeep($maxIds, $deleteOlderThanMax = true);
|
||||
|
||||
// ... then do small insert w/ locked tables to minimize the amount of time tables are locked.
|
||||
$this->lockLogTables();
|
||||
$this->insertActionsToKeep($maxIds, $deleteOlderThanMax = false);
|
||||
|
||||
// delete before unlocking tables so there's no chance a new log row that references an
|
||||
// unused action will be inserted.
|
||||
$this->deleteUnusedActions();
|
||||
|
||||
Db::unlockAllTables();
|
||||
|
||||
$this->dropTempTableForStoringUsedActions();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of the website IDs that received some visits between the specified timestamp.
|
||||
*
|
||||
* @param string $fromDateTime
|
||||
* @param string $toDateTime
|
||||
* @return bool true if there are visits for this site between the given timeframe, false if not
|
||||
*/
|
||||
public function hasSiteVisitsBetweenTimeframe($fromDateTime, $toDateTime, $idSite)
|
||||
{
|
||||
$sites = Db::fetchOne("SELECT 1
|
||||
FROM " . Common::prefixTable('log_visit') . "
|
||||
WHERE idsite = ?
|
||||
AND visit_last_action_time > ?
|
||||
AND visit_last_action_time < ?
|
||||
LIMIT 1", array($idSite, $fromDateTime, $toDateTime));
|
||||
|
||||
return (bool) $sites;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $columnsToSet
|
||||
* @return string
|
||||
*/
|
||||
protected function getColumnSetExpressions(array $columnsToSet)
|
||||
{
|
||||
$columnsToSet = array_map(
|
||||
function ($column) {
|
||||
return $column . ' = ?';
|
||||
},
|
||||
$columnsToSet
|
||||
);
|
||||
|
||||
return implode(', ', $columnsToSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $values
|
||||
* @param $idVisit
|
||||
* @param $sql
|
||||
* @return \Zend_Db_Statement
|
||||
* @throws \Exception
|
||||
*/
|
||||
protected function update($sql, array $values, $idVisit)
|
||||
{
|
||||
return Db::query($sql, array_merge(array_values($values), array($idVisit)));
|
||||
}
|
||||
|
||||
protected function getIdFieldForLogTable($logTable)
|
||||
{
|
||||
$idColumns = $this->getTableIdColumns();
|
||||
|
||||
if (isset($idColumns[$logTable])) {
|
||||
return $idColumns[$logTable];
|
||||
}
|
||||
|
||||
throw new \InvalidArgumentException("Unknown log table '$logTable'.");
|
||||
}
|
||||
|
||||
// TODO: instead of creating a log query like this, we should re-use segments. to do this, however, there must be a 1-1
|
||||
// mapping for dimensions => segments, and each dimension should automatically have a segment.
|
||||
private function createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep)
|
||||
{
|
||||
$bind = array();
|
||||
|
||||
$sql = "SELECT " . implode(', ', $fields) . " FROM `" . Common::prefixTable($logTable) . "` WHERE $idField > ?";
|
||||
|
||||
foreach ($conditions as $condition) {
|
||||
list($column, $operator, $value) = $condition;
|
||||
|
||||
if (is_array($value)) {
|
||||
$sql .= " AND $column IN (" . Common::getSqlStringFieldsArray($value) . ")";
|
||||
|
||||
$bind = array_merge($bind, $value);
|
||||
} else {
|
||||
$sql .= " AND $column $operator ?";
|
||||
|
||||
$bind[] = $value;
|
||||
}
|
||||
}
|
||||
|
||||
$sql .= " ORDER BY $idField ASC LIMIT " . (int)$iterationStep;
|
||||
|
||||
return array($sql, $bind);
|
||||
}
|
||||
|
||||
private function getInFieldExpressionWithInts($idVisits)
|
||||
{
|
||||
$sql = "(";
|
||||
|
||||
$isFirst = true;
|
||||
foreach ($idVisits as $idVisit) {
|
||||
if ($isFirst) {
|
||||
$isFirst = false;
|
||||
} else {
|
||||
$sql .= ', ';
|
||||
}
|
||||
|
||||
$sql .= (int)$idVisit;
|
||||
}
|
||||
|
||||
$sql .= ")";
|
||||
|
||||
return $sql;
|
||||
}
|
||||
|
||||
protected function getMaxIdsInLogTables()
|
||||
{
|
||||
$idColumns = $this->getTableIdColumns();
|
||||
$tables = array_keys($idColumns);
|
||||
|
||||
$result = array();
|
||||
foreach ($tables as $table) {
|
||||
$idCol = $idColumns[$table];
|
||||
$result[$table] = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
private function createTempTableForStoringUsedActions()
|
||||
{
|
||||
$sql = "CREATE TEMPORARY TABLE " . Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME) . " (
|
||||
idaction INT(11),
|
||||
PRIMARY KEY (idaction)
|
||||
)";
|
||||
Db::query($sql);
|
||||
}
|
||||
|
||||
private function dropTempTableForStoringUsedActions()
|
||||
{
|
||||
$sql = "DROP TABLE " . Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
|
||||
Db::query($sql);
|
||||
}
|
||||
|
||||
// protected for testing purposes
|
||||
protected function insertActionsToKeep($maxIds, $olderThan = true, $insertIntoTempIterationStep = 100000)
|
||||
{
|
||||
$tempTableName = Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
|
||||
|
||||
$idColumns = $this->getTableIdColumns();
|
||||
foreach ($this->dimensionMetadataProvider->getActionReferenceColumnsByTable() as $table => $columns) {
|
||||
$idCol = $idColumns[$table];
|
||||
// Create select query for requesting ALL needed fields at once
|
||||
$sql = "SELECT " . implode(',' ,$columns) . " FROM " . Common::prefixTable($table) . " WHERE $idCol >= ? AND $idCol < ?";
|
||||
|
||||
if ($olderThan) {
|
||||
// Why start on zero? When running for a couple of months, this will generate about 10000+ queries with zero result. Use the lowest value instead.... saves a LOT of waiting time!
|
||||
$start = (int) Db::fetchOne("SELECT MIN($idCol) FROM " . Common::prefixTable($table));;
|
||||
$finish = $maxIds[$table];
|
||||
} else {
|
||||
$start = $maxIds[$table];
|
||||
$finish = (int) Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
|
||||
}
|
||||
// Borrowed from Db::segmentedFetchAll
|
||||
// Request records per $insertIntoTempIterationStep amount
|
||||
// Loop over the result set, mapping all numeric fields in a single insert query
|
||||
|
||||
// Insert query would be: INSERT IGNORE INTO [temp_table] VALUES (X),(Y),(Z) depending on the amount of fields requested per row
|
||||
for ($i = $start; $i <= $finish; $i += $insertIntoTempIterationStep) {
|
||||
$currentParams = array($i, $i + $insertIntoTempIterationStep);
|
||||
$result = Db::fetchAll($sql, $currentParams);
|
||||
// Now we loop over the result set of max $insertIntoTempIterationStep rows and create insert queries
|
||||
$keepValues = [];
|
||||
foreach ($result as $row) {
|
||||
$keepValues = array_merge($keepValues, array_filter(array_values($row), "is_numeric"));
|
||||
if (count($keepValues) >= 1000) {
|
||||
$insert = 'INSERT IGNORE INTO ' . $tempTableName .' VALUES (';
|
||||
$insert .= implode('),(', $keepValues);
|
||||
$insert .= ')';
|
||||
|
||||
Db::exec($insert);
|
||||
$keepValues = [];
|
||||
}
|
||||
}
|
||||
|
||||
$insert = 'INSERT IGNORE INTO ' . $tempTableName .' VALUES (';
|
||||
$insert .= implode('),(', $keepValues);
|
||||
$insert .= ')';
|
||||
|
||||
Db::exec($insert);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function lockLogTables()
|
||||
{
|
||||
$tables = $this->getTableIdColumns();
|
||||
unset($tables['log_action']); // we write lock it
|
||||
$tableNames = array_keys($tables);
|
||||
|
||||
$readLocks = array();
|
||||
foreach ($tableNames as $tableName) {
|
||||
$readLocks[] = Common::prefixTable($tableName);
|
||||
}
|
||||
|
||||
Db::lockTables(
|
||||
$readLocks,
|
||||
$writeLocks = Common::prefixTables('log_action')
|
||||
);
|
||||
}
|
||||
|
||||
private function deleteUnusedActions()
|
||||
{
|
||||
list($logActionTable, $tempTableName) = Common::prefixTables("log_action", self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
|
||||
|
||||
$deleteSql = "DELETE LOW_PRIORITY QUICK IGNORE $logActionTable
|
||||
FROM $logActionTable
|
||||
LEFT JOIN $tempTableName tmp ON tmp.idaction = $logActionTable.idaction
|
||||
WHERE tmp.idaction IS NULL";
|
||||
|
||||
Db::query($deleteSql);
|
||||
}
|
||||
|
||||
protected function getTableIdColumns()
|
||||
{
|
||||
$columns = array();
|
||||
|
||||
foreach ($this->logTablesProvider->getAllLogTables() as $logTable) {
|
||||
$idColumn = $logTable->getIdColumn();
|
||||
|
||||
if (!empty($idColumn)) {
|
||||
$columns[$logTable->getName()] = $idColumn;
|
||||
}
|
||||
}
|
||||
|
||||
return $columns;
|
||||
}
|
||||
}
|
56
msd2/tracking/piwik/core/DataAccess/TableMetadata.php
Normal file
56
msd2/tracking/piwik/core/DataAccess/TableMetadata.php
Normal file
@ -0,0 +1,56 @@
|
||||
<?php
|
||||
/**
|
||||
* Piwik - free/libre analytics platform
|
||||
*
|
||||
* @link http://piwik.org
|
||||
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
||||
*/
|
||||
namespace Piwik\DataAccess;
|
||||
|
||||
use Piwik\Db;
|
||||
|
||||
/**
|
||||
* Data Access Object that can be used to get metadata information about
|
||||
* the MySQL tables Piwik uses.
|
||||
*/
|
||||
class TableMetadata
|
||||
{
|
||||
/**
|
||||
* Returns the list of column names for a table.
|
||||
*
|
||||
* @param string $table Prefixed table name.
|
||||
* @return string[] List of column names..
|
||||
*/
|
||||
public function getColumns($table)
|
||||
{
|
||||
$table = str_replace("`", "", $table);
|
||||
|
||||
$columns = Db::fetchAll("SHOW COLUMNS FROM `" . $table . "`");
|
||||
|
||||
$columnNames = array();
|
||||
foreach ($columns as $column) {
|
||||
$columnNames[] = $column['Field'];
|
||||
}
|
||||
|
||||
return $columnNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of idaction columns in a table. A column is
|
||||
* assumed to be an idaction reference if it has `"idaction"` in its
|
||||
* name (eg, `"idaction_url"` or `"idaction_content_name"`.
|
||||
*
|
||||
* @param string $table Prefixed table name.
|
||||
* @return string[]
|
||||
*/
|
||||
public function getIdActionColumnNames($table)
|
||||
{
|
||||
$columns = $this->getColumns($table);
|
||||
|
||||
$columns = array_filter($columns, function ($columnName) {
|
||||
return strpos($columnName, 'idaction') !== false;
|
||||
});
|
||||
|
||||
return array_values($columns);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user