1988 lines
72 KiB
PHP
1988 lines
72 KiB
PHP
<?php
|
|
/**
|
|
* Piwik - free/libre analytics platform
|
|
*
|
|
* @link http://piwik.org
|
|
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
|
*
|
|
*/
|
|
namespace Piwik;
|
|
|
|
use Exception;
|
|
use Piwik\ArchiveProcessor\PluginsArchiver;
|
|
use Piwik\ArchiveProcessor\Rules;
|
|
use Piwik\Archiver\Request;
|
|
use Piwik\CliMulti\Process;
|
|
use Piwik\Container\StaticContainer;
|
|
use Piwik\CronArchive\FixedSiteIds;
|
|
use Piwik\CronArchive\Performance\Logger;
|
|
use Piwik\CronArchive\SharedSiteIds;
|
|
use Piwik\Archive\ArchiveInvalidator;
|
|
use Piwik\DataAccess\RawLogDao;
|
|
use Piwik\Exception\UnexpectedWebsiteFoundException;
|
|
use Piwik\Metrics\Formatter;
|
|
use Piwik\Period\Factory as PeriodFactory;
|
|
use Piwik\CronArchive\SitesToReprocessDistributedList;
|
|
use Piwik\CronArchive\SegmentArchivingRequestUrlProvider;
|
|
use Piwik\Plugins\CoreAdminHome\API as CoreAdminHomeAPI;
|
|
use Piwik\Plugins\SegmentEditor\Model as SegmentEditorModel;
|
|
use Piwik\Plugins\SitesManager\API as APISitesManager;
|
|
use Piwik\Plugins\UsersManager\API as APIUsersManager;
|
|
use Piwik\Plugins\UsersManager\UserPreferences;
|
|
use Psr\Log\LoggerInterface;
|
|
|
|
/**
|
|
* ./console core:archive runs as a cron and is a useful tool for general maintenance,
|
|
* and pre-process reports for a Fast dashboard rendering.
|
|
*/
|
|
class CronArchive
|
|
{
|
|
// the url can be set here before the init, and it will be used instead of --url=
|
|
public static $url = false;
|
|
|
|
// Max parallel requests for a same site's segments
|
|
const MAX_CONCURRENT_API_REQUESTS = 3;
|
|
|
|
// force-timeout-for-periods default (1 hour)
|
|
const SECONDS_DELAY_BETWEEN_PERIOD_ARCHIVES = 3600;
|
|
|
|
// force-all-periods default (7 days)
|
|
const ARCHIVE_SITES_WITH_TRAFFIC_SINCE = 604800;
|
|
|
|
// By default, will process last 52 days and months
|
|
// It will be overwritten by the number of days since last archiving ran until completion.
|
|
const DEFAULT_DATE_LAST = 52;
|
|
|
|
// Since weeks are not used in yearly archives, we make sure that all possible weeks are processed
|
|
const DEFAULT_DATE_LAST_WEEKS = 260;
|
|
|
|
const DEFAULT_DATE_LAST_YEARS = 7;
|
|
|
|
// Flag to know when the archive cron is calling the API
|
|
const APPEND_TO_API_REQUEST = '&trigger=archivephp';
|
|
|
|
// Flag used to record timestamp in Option::
|
|
const OPTION_ARCHIVING_FINISHED_TS = "LastCompletedFullArchiving";
|
|
|
|
// Name of option used to store starting timestamp
|
|
const OPTION_ARCHIVING_STARTED_TS = "LastFullArchivingStartTime";
|
|
|
|
// Show only first N characters from Piwik API output in case of errors
|
|
const TRUNCATE_ERROR_MESSAGE_SUMMARY = 6000;
|
|
|
|
// archiving will be triggered on all websites with traffic in the last $shouldArchiveOnlySitesWithTrafficSince seconds
|
|
private $shouldArchiveOnlySitesWithTrafficSince;
|
|
|
|
// By default, we only process the current week/month/year at most once an hour
|
|
private $processPeriodsMaximumEverySeconds;
|
|
private $todayArchiveTimeToLive;
|
|
private $websiteDayHasFinishedSinceLastRun = array();
|
|
private $idSitesInvalidatedOldReports = array();
|
|
private $shouldArchiveOnlySpecificPeriods = array();
|
|
private $idSitesNotUsingTracker;
|
|
|
|
/**
|
|
* @var SharedSiteIds|FixedSiteIds
|
|
*/
|
|
private $websites = array();
|
|
private $allWebsites = array();
|
|
private $segments = array();
|
|
private $visitsToday = 0;
|
|
private $requests = 0;
|
|
private $archiveAndRespectTTL = true;
|
|
|
|
private $lastSuccessRunTimestamp = false;
|
|
private $errors = array();
|
|
|
|
private $apiToInvalidateArchivedReport;
|
|
|
|
const NO_ERROR = "no error";
|
|
|
|
public $testmode = false;
|
|
|
|
/**
|
|
* The list of IDs for sites for whom archiving should be initiated. If supplied, only these
|
|
* sites will be archived.
|
|
*
|
|
* @var int[]
|
|
*/
|
|
public $shouldArchiveSpecifiedSites = array();
|
|
|
|
/**
|
|
* The list of IDs of sites to ignore when launching archiving. Archiving will not be launched
|
|
* for any site whose ID is in this list (even if the ID is supplied in {@link $shouldArchiveSpecifiedSites}
|
|
* or if {@link $shouldArchiveAllSites} is true).
|
|
*
|
|
* @var int[]
|
|
*/
|
|
public $shouldSkipSpecifiedSites = array();
|
|
|
|
/**
|
|
* If true, archiving will be launched for every site.
|
|
*
|
|
* @var bool
|
|
*/
|
|
public $shouldArchiveAllSites = false;
|
|
|
|
/**
|
|
* If true, xhprof will be initiated for the archiving run. Only for development/testing.
|
|
*
|
|
* @var bool
|
|
*/
|
|
public $shouldStartProfiler = false;
|
|
|
|
/**
|
|
* Given options will be forwarded to the PHP command if the archiver is executed via CLI.
|
|
* @var string
|
|
*/
|
|
public $phpCliConfigurationOptions = '';
|
|
|
|
/**
|
|
* If HTTP requests are used to initiate archiving, this controls whether invalid SSL certificates should
|
|
* be accepted or not by each request.
|
|
*
|
|
* @var bool
|
|
*/
|
|
public $acceptInvalidSSLCertificate = false;
|
|
|
|
/**
|
|
* If set to true, scheduled tasks will not be run.
|
|
*
|
|
* @var bool
|
|
*/
|
|
public $disableScheduledTasks = false;
|
|
|
|
/**
|
|
* The amount of seconds between non-day period archiving. That is, if archiving has been launched within
|
|
* the past [$forceTimeoutPeriod] seconds, Piwik will not initiate archiving for week, month and year periods.
|
|
*
|
|
* @var int|false
|
|
*/
|
|
public $forceTimeoutPeriod = false;
|
|
|
|
/**
|
|
* If supplied, archiving will be launched for sites that have had visits within the last [$shouldArchiveAllPeriodsSince]
|
|
* seconds. If set to `true`, the value defaults to {@link ARCHIVE_SITES_WITH_TRAFFIC_SINCE}.
|
|
*
|
|
* @var int|bool
|
|
*/
|
|
public $shouldArchiveAllPeriodsSince = false;
|
|
|
|
/**
|
|
* If supplied, archiving will be launched only for periods that fall within this date range. For example,
|
|
* `"2012-01-01,2012-03-15"` would result in January 2012, February 2012 being archived but not April 2012.
|
|
*
|
|
* @var string|false eg, `"2012-01-01,2012-03-15"`
|
|
*/
|
|
public $restrictToDateRange = false;
|
|
|
|
/**
|
|
* A list of periods to launch archiving for. By default, day, week, month and year periods
|
|
* are considered. This variable can limit the periods to, for example, week & month only.
|
|
*
|
|
* @var string[] eg, `array("day","week","month","year")`
|
|
*/
|
|
public $restrictToPeriods = array();
|
|
|
|
/**
|
|
* Forces CronArchive to retrieve data for the last [$dateLastForced] periods when initiating archiving.
|
|
* When archiving weeks, for example, if 10 is supplied, the API will be called w/ last10. This will potentially
|
|
* initiate archiving for the last 10 weeks.
|
|
*
|
|
* @var int|false
|
|
*/
|
|
public $dateLastForced = false;
|
|
|
|
/**
|
|
* The number of concurrent requests to issue per website. Defaults to {@link MAX_CONCURRENT_API_REQUESTS}.
|
|
*
|
|
* Used when archiving a site's segments concurrently.
|
|
*
|
|
* @var int|false
|
|
*/
|
|
public $concurrentRequestsPerWebsite = false;
|
|
|
|
/**
|
|
* The number of concurrent archivers to run at once max.
|
|
*
|
|
* @var int|false
|
|
*/
|
|
public $maxConcurrentArchivers = false;
|
|
|
|
/**
|
|
* List of segment strings to force archiving for. If a stored segment is not in this list, it will not
|
|
* be archived.
|
|
*
|
|
* @var string[]
|
|
*/
|
|
public $segmentsToForce = array();
|
|
|
|
/**
|
|
* @var bool
|
|
*/
|
|
public $disableSegmentsArchiving = false;
|
|
|
|
private $websitesWithVisitsSinceLastRun = 0;
|
|
private $skippedPeriodsArchivesWebsite = 0;
|
|
private $skippedPeriodsNoDataInPeriod = 0;
|
|
private $skippedDayArchivesWebsites = 0;
|
|
private $skippedDayNoRecentData = 0;
|
|
private $skippedDayOnApiError = 0;
|
|
private $skipped = 0;
|
|
private $processed = 0;
|
|
private $archivedPeriodsArchivesWebsite = 0;
|
|
|
|
private $archivingStartingTime;
|
|
|
|
private $formatter;
|
|
|
|
/**
|
|
* @var SegmentArchivingRequestUrlProvider
|
|
*/
|
|
private $segmentArchivingRequestUrlProvider;
|
|
|
|
/**
|
|
* @var LoggerInterface
|
|
*/
|
|
private $logger;
|
|
|
|
/**
|
|
* Only used when archiving using HTTP requests.
|
|
*
|
|
* @var string
|
|
*/
|
|
private $urlToPiwik = null;
|
|
|
|
/**
|
|
* @var ArchiveInvalidator
|
|
*/
|
|
private $invalidator;
|
|
|
|
/**
|
|
* @var bool
|
|
*/
|
|
private $isArchiveProfilingEnabled = false;
|
|
|
|
/**
|
|
* Returns the option name of the option that stores the time core:archive was last executed.
|
|
*
|
|
* @param int $idSite
|
|
* @param string $period
|
|
* @return string
|
|
*/
|
|
public static function lastRunKey($idSite, $period)
|
|
{
|
|
return "lastRunArchive" . $period . "_" . $idSite;
|
|
}
|
|
|
|
/**
|
|
* Constructor.
|
|
*
|
|
* @param string|null $processNewSegmentsFrom When to archive new segments from. See [General] process_new_segments_from
|
|
* for possible values.
|
|
* @param LoggerInterface|null $logger
|
|
*/
|
|
public function __construct($processNewSegmentsFrom = null, LoggerInterface $logger = null)
|
|
{
|
|
$this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
|
|
$this->formatter = new Formatter();
|
|
|
|
$processNewSegmentsFrom = $processNewSegmentsFrom ?: StaticContainer::get('ini.General.process_new_segments_from');
|
|
$this->segmentArchivingRequestUrlProvider = new SegmentArchivingRequestUrlProvider($processNewSegmentsFrom);
|
|
|
|
$this->invalidator = StaticContainer::get('Piwik\Archive\ArchiveInvalidator');
|
|
|
|
$this->isArchiveProfilingEnabled = Config::getInstance()->Debug['archiving_profile'] == 1;
|
|
}
|
|
|
|
private function isMaintenanceModeEnabled()
|
|
{
|
|
return Config::getInstance()->General['maintenance_mode'] == 1;
|
|
}
|
|
|
|
/**
|
|
* Initializes and runs the cron archiver.
|
|
*/
|
|
public function main()
|
|
{
|
|
if ($this->isMaintenanceModeEnabled()) {
|
|
$this->logger->info("Archiving won't run because maintenance mode is enabled");
|
|
return;
|
|
}
|
|
|
|
$self = $this;
|
|
Access::doAsSuperUser(function () use ($self) {
|
|
$self->init();
|
|
$self->run();
|
|
$self->runScheduledTasks();
|
|
$self->end();
|
|
});
|
|
}
|
|
|
|
public function init()
|
|
{
|
|
/**
|
|
* This event is triggered during initializing archiving.
|
|
*
|
|
* @param CronArchive $this
|
|
*/
|
|
Piwik::postEvent('CronArchive.init.start', array($this));
|
|
|
|
SettingsServer::setMaxExecutionTime(0);
|
|
|
|
$this->archivingStartingTime = time();
|
|
|
|
// Note: the order of methods call matters here.
|
|
$this->initStateFromParameters();
|
|
|
|
$this->logInitInfo();
|
|
$this->logArchiveTimeoutInfo();
|
|
|
|
// record archiving start time
|
|
Option::set(self::OPTION_ARCHIVING_STARTED_TS, time());
|
|
|
|
$this->segments = $this->initSegmentsToArchive();
|
|
$this->allWebsites = APISitesManager::getInstance()->getAllSitesId();
|
|
|
|
if (!empty($this->shouldArchiveOnlySpecificPeriods)) {
|
|
$this->logger->info("- Will only process the following periods: " . implode(", ", $this->shouldArchiveOnlySpecificPeriods) . " (--force-periods)");
|
|
}
|
|
|
|
$this->invalidateArchivedReportsForSitesThatNeedToBeArchivedAgain();
|
|
|
|
$websitesIds = $this->initWebsiteIds();
|
|
$this->filterWebsiteIds($websitesIds);
|
|
|
|
$this->websites = $this->createSitesToArchiveQueue($websitesIds);
|
|
|
|
if ($this->websites->getInitialSiteIds() != $websitesIds) {
|
|
$this->logger->info('Will ignore websites and help finish a previous started queue instead. IDs: ' . implode(', ', $this->websites->getInitialSiteIds()));
|
|
}
|
|
|
|
$this->logForcedSegmentInfo();
|
|
|
|
/**
|
|
* This event is triggered after a CronArchive instance is initialized.
|
|
*
|
|
* @param array $websiteIds The list of website IDs this CronArchive instance is processing.
|
|
* This will be the entire list of IDs regardless of whether some have
|
|
* already been processed.
|
|
*/
|
|
Piwik::postEvent('CronArchive.init.finish', array($this->websites->getInitialSiteIds()));
|
|
}
|
|
|
|
/**
|
|
* Main function, runs archiving on all websites with new activity
|
|
*/
|
|
public function run()
|
|
{
|
|
$timer = new Timer;
|
|
|
|
$this->logSection("START");
|
|
$this->logger->info("Starting Matomo reports archiving...");
|
|
|
|
$numWebsitesScheduled = $this->websites->getNumSites();
|
|
$numWebsitesArchived = 0;
|
|
|
|
$cliMulti = $this->makeCliMulti();
|
|
if ($this->maxConcurrentArchivers && $cliMulti->supportsAsync()) {
|
|
$numRunning = 0;
|
|
$processes = Process::getListOfRunningProcesses();
|
|
$instanceId = SettingsPiwik::getPiwikInstanceId();
|
|
|
|
foreach ($processes as $process) {
|
|
if (strpos($process, 'console core:archive') !== false &&
|
|
(!$instanceId
|
|
|| strpos($process, '--matomo-domain=' . $instanceId) !== false
|
|
|| strpos($process, '--matomo-domain="' . $instanceId . '"') !== false
|
|
|| strpos($process, '--matomo-domain=\'' . $instanceId . "'") !== false
|
|
|| strpos($process, '--piwik-domain=' . $instanceId) !== false
|
|
|| strpos($process, '--piwik-domain="' . $instanceId . '"') !== false
|
|
|| strpos($process, '--piwik-domain=\'' . $instanceId . "'") !== false)) {
|
|
$numRunning++;
|
|
}
|
|
}
|
|
if ($this->maxConcurrentArchivers < $numRunning) {
|
|
$this->logger->info(sprintf("Archiving will stop now because %s archivers are already running and max %s are supposed to run at once.", $numRunning, $this->maxConcurrentArchivers));
|
|
return;
|
|
} else {
|
|
$this->logger->info(sprintf("%s out of %s archivers running currently", $numRunning, $this->maxConcurrentArchivers));
|
|
}
|
|
}
|
|
|
|
do {
|
|
if ($this->isMaintenanceModeEnabled()) {
|
|
$this->logger->info("Archiving will stop now because maintenance mode is enabled");
|
|
return;
|
|
}
|
|
|
|
$idSite = $this->websites->getNextSiteId();
|
|
$numWebsitesArchived++;
|
|
|
|
if (null === $idSite) {
|
|
break;
|
|
}
|
|
|
|
if ($numWebsitesArchived > $numWebsitesScheduled) {
|
|
// this is needed because a cron:archive might run for example for 5 hours. Meanwhile 5 other
|
|
// `cron:archive` have been possibly started... this means meanwhile, within the 5 hours, the
|
|
// `list of SharedSiteIds` have been potentially emptied and filled again from the beginning.
|
|
// This means 5 hours later, even though all websites that were originally in the list have been
|
|
// finished by now, the `cron:archive` will stay active and continue processing because the list of
|
|
// siteIds to archive was resetted by another `cron:archive` command. Potentially some `cron:archive`
|
|
// will basically never end because by the time the `cron:archive` finishes, the sharedSideIds have
|
|
// been resettet. This can eventually lead to some random concurrency issues when there are like
|
|
// 40 `core:archive` active at the same time.
|
|
$this->logger->info("Stopping archiving as the initial list of websites has been processed.");
|
|
return;
|
|
}
|
|
|
|
flush();
|
|
$requestsBefore = $this->requests;
|
|
if ($idSite <= 0) {
|
|
continue;
|
|
}
|
|
|
|
$skipWebsiteForced = in_array($idSite, $this->shouldSkipSpecifiedSites);
|
|
if ($skipWebsiteForced) {
|
|
$this->logger->info("Skipped website id $idSite, found in --skip-idsites ");
|
|
$this->skipped++;
|
|
continue;
|
|
}
|
|
|
|
$shouldCheckIfArchivingIsNeeded = !$this->shouldArchiveSpecifiedSites && !$this->shouldArchiveAllSites && !$this->dateLastForced;
|
|
$hasWebsiteDayFinishedSinceLastRun = in_array($idSite, $this->websiteDayHasFinishedSinceLastRun);
|
|
$isOldReportInvalidatedForWebsite = $this->isOldReportInvalidatedForWebsite($idSite);
|
|
|
|
if ($shouldCheckIfArchivingIsNeeded) {
|
|
// if not specific sites and not all websites should be archived, we check whether we actually have
|
|
// to process the archives for this website (only if there were visits since midnight)
|
|
if (!$hasWebsiteDayFinishedSinceLastRun && !$isOldReportInvalidatedForWebsite) {
|
|
|
|
if ($this->isWebsiteUsingTheTracker($idSite)) {
|
|
|
|
if(!$this->hadWebsiteTrafficSinceMidnightInTimezone($idSite)) {
|
|
$this->logger->info("Skipped website id $idSite as archiving is not needed");
|
|
|
|
$this->skippedDayNoRecentData++;
|
|
$this->skipped++;
|
|
continue;
|
|
}
|
|
} else {
|
|
$this->logger->info("- website id $idSite is not using the tracker");
|
|
}
|
|
|
|
} elseif ($hasWebsiteDayFinishedSinceLastRun) {
|
|
$this->logger->info("Day has finished for website id $idSite since last run");
|
|
} elseif ($isOldReportInvalidatedForWebsite) {
|
|
$this->logger->info("Old report was invalidated for website id $idSite");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This event is triggered before the cron archiving process starts archiving data for a single
|
|
* site.
|
|
*
|
|
* @param int $idSite The ID of the site we're archiving data for.
|
|
*/
|
|
Piwik::postEvent('CronArchive.archiveSingleSite.start', array($idSite));
|
|
|
|
$completed = $this->archiveSingleSite($idSite, $requestsBefore);
|
|
|
|
/**
|
|
* This event is triggered immediately after the cron archiving process starts archiving data for a single
|
|
* site.
|
|
*
|
|
* @param int $idSite The ID of the site we're archiving data for.
|
|
*/
|
|
Piwik::postEvent('CronArchive.archiveSingleSite.finish', array($idSite, $completed));
|
|
} while (!empty($idSite));
|
|
|
|
$this->logger->info("Done archiving!");
|
|
|
|
$this->logSection("SUMMARY");
|
|
$this->logger->info("Total visits for today across archived websites: " . $this->visitsToday);
|
|
|
|
$totalWebsites = count($this->allWebsites);
|
|
$this->skipped = $totalWebsites - $this->websitesWithVisitsSinceLastRun;
|
|
$this->logger->info("Archived today's reports for {$this->websitesWithVisitsSinceLastRun} websites");
|
|
$this->logger->info("Archived week/month/year for {$this->archivedPeriodsArchivesWebsite} websites");
|
|
$this->logger->info("Skipped {$this->skipped} websites");
|
|
$this->logger->info("- {$this->skippedDayNoRecentData} skipped because no new visit since the last script execution");
|
|
$this->logger->info("- {$this->skippedDayArchivesWebsites} skipped because existing daily reports are less than {$this->todayArchiveTimeToLive} seconds old");
|
|
$this->logger->info("- {$this->skippedPeriodsArchivesWebsite} skipped because existing week/month/year periods reports are less than {$this->processPeriodsMaximumEverySeconds} seconds old");
|
|
|
|
if($this->skippedPeriodsNoDataInPeriod) {
|
|
$this->logger->info("- {$this->skippedPeriodsNoDataInPeriod} skipped periods archiving because no visit in recent days");
|
|
}
|
|
|
|
if($this->skippedDayOnApiError) {
|
|
$this->logger->info("- {$this->skippedDayOnApiError} skipped because got an error while querying reporting API");
|
|
}
|
|
$this->logger->info("Total API requests: {$this->requests}");
|
|
|
|
//DONE: done/total, visits, wtoday, wperiods, reqs, time, errors[count]: first eg.
|
|
$percent = $this->websites->getNumSites() == 0
|
|
? ""
|
|
: " " . round($this->processed * 100 / $this->websites->getNumSites(), 0) . "%";
|
|
$this->logger->info("done: " .
|
|
$this->processed . "/" . $this->websites->getNumSites() . "" . $percent . ", " .
|
|
$this->visitsToday . " vtoday, $this->websitesWithVisitsSinceLastRun wtoday, {$this->archivedPeriodsArchivesWebsite} wperiods, " .
|
|
$this->requests . " req, " . round($timer->getTimeMs()) . " ms, " .
|
|
(empty($this->errors)
|
|
? self::NO_ERROR
|
|
: (count($this->errors) . " errors."))
|
|
);
|
|
|
|
$this->logger->info($timer->__toString());
|
|
}
|
|
|
|
/**
|
|
* End of the script
|
|
*/
|
|
public function end()
|
|
{
|
|
/**
|
|
* This event is triggered after archiving.
|
|
*
|
|
* @param CronArchive $this
|
|
*/
|
|
Piwik::postEvent('CronArchive.end', array($this));
|
|
|
|
if (empty($this->errors)) {
|
|
// No error -> Logs the successful script execution until completion
|
|
Option::set(self::OPTION_ARCHIVING_FINISHED_TS, time());
|
|
return;
|
|
}
|
|
|
|
$this->logSection("SUMMARY OF ERRORS");
|
|
foreach ($this->errors as $error) {
|
|
// do not logError since errors are already in stderr
|
|
$this->logger->info("Error: " . $error);
|
|
}
|
|
|
|
$summary = count($this->errors) . " total errors during this script execution, please investigate and try and fix these errors.";
|
|
$this->logFatalError($summary);
|
|
}
|
|
|
|
public function logFatalError($m)
|
|
{
|
|
$this->logError($m);
|
|
|
|
throw new Exception($m);
|
|
}
|
|
|
|
/**
|
|
* @param int[] $idSegments
|
|
*/
|
|
public function setSegmentsToForceFromSegmentIds($idSegments)
|
|
{
|
|
/** @var SegmentEditorModel $segmentEditorModel */
|
|
$segmentEditorModel = StaticContainer::get('Piwik\Plugins\SegmentEditor\Model');
|
|
$segments = $segmentEditorModel->getAllSegmentsAndIgnoreVisibility();
|
|
|
|
$segments = array_filter($segments, function ($segment) use ($idSegments) {
|
|
return in_array($segment['idsegment'], $idSegments);
|
|
});
|
|
|
|
$segments = array_map(function ($segment) {
|
|
return $segment['definition'];
|
|
}, $segments);
|
|
|
|
$this->segmentsToForce = $segments;
|
|
}
|
|
|
|
public function runScheduledTasks()
|
|
{
|
|
$this->logSection("SCHEDULED TASKS");
|
|
|
|
if ($this->disableScheduledTasks) {
|
|
$this->logger->info("Scheduled tasks are disabled with --disable-scheduled-tasks");
|
|
return;
|
|
}
|
|
|
|
// TODO: this is a HACK to get the purgeOutdatedArchives task to work when run below. without
|
|
// it, the task will not run because we no longer run the tasks through CliMulti.
|
|
// harder to implement alternatives include:
|
|
// - moving CronArchive logic to DI and setting a flag in the class when the whole process
|
|
// runs
|
|
// - setting a new DI environment for core:archive which CoreAdminHome can use to conditionally
|
|
// enable/disable the task
|
|
$_GET['trigger'] = 'archivephp';
|
|
CoreAdminHomeAPI::getInstance()->runScheduledTasks();
|
|
|
|
$this->logSection("");
|
|
}
|
|
|
|
private function archiveSingleSite($idSite, $requestsBefore)
|
|
{
|
|
$timerWebsite = new Timer;
|
|
|
|
$lastTimestampWebsiteProcessedPeriods = $lastTimestampWebsiteProcessedDay = false;
|
|
|
|
if ($this->archiveAndRespectTTL) {
|
|
Option::clearCachedOption($this->lastRunKey($idSite, "periods"));
|
|
$lastTimestampWebsiteProcessedPeriods = $this->getPeriodLastProcessedTimestamp($idSite);
|
|
|
|
Option::clearCachedOption($this->lastRunKey($idSite, "day"));
|
|
$lastTimestampWebsiteProcessedDay = $this->getDayLastProcessedTimestamp($idSite);
|
|
}
|
|
|
|
$this->updateIdSitesInvalidatedOldReports();
|
|
|
|
// For period other than days, we only re-process the reports at most
|
|
// 1) every $processPeriodsMaximumEverySeconds
|
|
$secondsSinceLastExecution = time() - $lastTimestampWebsiteProcessedPeriods;
|
|
|
|
// if timeout is more than 10 min, we account for a 5 min processing time, and allow trigger 1 min earlier
|
|
if ($this->processPeriodsMaximumEverySeconds > 10 * 60) {
|
|
$secondsSinceLastExecution += 5 * 60;
|
|
}
|
|
|
|
$shouldArchivePeriods = $secondsSinceLastExecution > $this->processPeriodsMaximumEverySeconds;
|
|
if (empty($lastTimestampWebsiteProcessedPeriods)) {
|
|
// 2) OR always if script never executed for this website before
|
|
$shouldArchivePeriods = true;
|
|
}
|
|
|
|
// (*) If the website is archived because it is a new day in its timezone
|
|
// We make sure all periods are archived, even if there is 0 visit today
|
|
$dayHasEndedMustReprocess = in_array($idSite, $this->websiteDayHasFinishedSinceLastRun);
|
|
if ($dayHasEndedMustReprocess) {
|
|
$shouldArchivePeriods = true;
|
|
}
|
|
|
|
// (*) If there was some old reports invalidated for this website
|
|
// we make sure all these old reports are triggered at least once
|
|
$websiteInvalidatedShouldReprocess = $this->isOldReportInvalidatedForWebsite($idSite);
|
|
|
|
if ($websiteInvalidatedShouldReprocess) {
|
|
$shouldArchivePeriods = true;
|
|
}
|
|
|
|
$websiteIdIsForced = in_array($idSite, $this->shouldArchiveSpecifiedSites);
|
|
if ($websiteIdIsForced) {
|
|
$shouldArchivePeriods = true;
|
|
}
|
|
|
|
// Test if we should process this website at all
|
|
$elapsedSinceLastArchiving = time() - $lastTimestampWebsiteProcessedDay;
|
|
|
|
// Skip this day archive if last archive was older than TTL
|
|
$existingArchiveIsValid = ($elapsedSinceLastArchiving < $this->todayArchiveTimeToLive);
|
|
|
|
$skipDayArchive = false;
|
|
if($existingArchiveIsValid
|
|
&& !$websiteIdIsForced
|
|
&& !$websiteInvalidatedShouldReprocess
|
|
&& !$dayHasEndedMustReprocess
|
|
&& $this->hasBeenProcessedSinceMidnight($idSite, $lastTimestampWebsiteProcessedDay)) {
|
|
$skipDayArchive = true;
|
|
}
|
|
|
|
if ($skipDayArchive) {
|
|
$this->logger->info("Skipped website id $idSite, already done "
|
|
. $this->formatter->getPrettyTimeFromSeconds($elapsedSinceLastArchiving, true)
|
|
. " ago, " . $timerWebsite->__toString());
|
|
$this->skippedDayArchivesWebsites++;
|
|
$this->skipped++;
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Trigger archiving for days
|
|
*/
|
|
try {
|
|
$shouldProceed = $this->processArchiveDays($idSite, $lastTimestampWebsiteProcessedDay, $shouldArchivePeriods, $timerWebsite);
|
|
} catch (UnexpectedWebsiteFoundException $e) {
|
|
// this website was deleted in the meantime
|
|
$shouldProceed = false;
|
|
$this->logger->info("Skipped website id $idSite, got: UnexpectedWebsiteFoundException, " . $timerWebsite->__toString());
|
|
}
|
|
|
|
if (!$shouldProceed) {
|
|
return false;
|
|
}
|
|
|
|
if (!$shouldArchivePeriods) {
|
|
$this->logger->info("Skipped website id $idSite periods processing, already done "
|
|
. $this->formatter->getPrettyTimeFromSeconds($elapsedSinceLastArchiving, true)
|
|
. " ago, " . $timerWebsite->__toString());
|
|
$this->skippedPeriodsArchivesWebsite++;
|
|
$this->skipped++;
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Trigger archiving for non-day periods
|
|
*/
|
|
$success = $this->processArchiveForPeriods($idSite, $lastTimestampWebsiteProcessedPeriods);
|
|
|
|
// Record successful run of this website's periods archiving
|
|
if ($success) {
|
|
Option::set($this->lastRunKey($idSite, "periods"), time());
|
|
}
|
|
|
|
if (!$success) {
|
|
// cancel marking the site as reprocessed
|
|
if ($websiteInvalidatedShouldReprocess) {
|
|
$store = new SitesToReprocessDistributedList();
|
|
$store->add($idSite);
|
|
}
|
|
}
|
|
|
|
$this->archivedPeriodsArchivesWebsite++;
|
|
|
|
$requestsWebsite = $this->requests - $requestsBefore;
|
|
$this->logger->info("Archived website id = $idSite, "
|
|
. $requestsWebsite . " API requests, "
|
|
. $timerWebsite->__toString()
|
|
. " [" . $this->websites->getNumProcessedWebsites() . "/"
|
|
. $this->websites->getNumSites()
|
|
. " done]");
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @param $lastTimestampWebsiteProcessedPeriods
|
|
* @return bool
|
|
*/
|
|
private function processArchiveForPeriods($idSite, $lastTimestampWebsiteProcessedPeriods)
|
|
{
|
|
$success = true;
|
|
|
|
foreach (array('week', 'month', 'year') as $period) {
|
|
if (!$this->shouldProcessPeriod($period)) {
|
|
// if any period was skipped, we do not mark the Periods archiving as successful
|
|
$success = false;
|
|
continue;
|
|
}
|
|
|
|
$timer = new Timer();
|
|
|
|
$date = $this->getApiDateParameter($idSite, $period, $lastTimestampWebsiteProcessedPeriods);
|
|
$periodArchiveWasSuccessful = $this->archiveReportsFor($idSite, $period, $date, $archiveSegments = true, $timer);
|
|
$success = $periodArchiveWasSuccessful && $success;
|
|
if(!$success) {
|
|
// if it failed, we abort the current website processing
|
|
return $success;
|
|
}
|
|
}
|
|
|
|
if ($this->shouldProcessPeriod('range')) {
|
|
// period=range
|
|
$customDateRangesToPreProcessForSite = $this->getCustomDateRangeToPreProcess($idSite);
|
|
foreach ($customDateRangesToPreProcessForSite as $dateRange) {
|
|
$timer = new Timer();
|
|
$archiveSegments = false; // do not pre-process segments for period=range #7611
|
|
$periodArchiveWasSuccessful = $this->archiveReportsFor($idSite, 'range', $dateRange, $archiveSegments, $timer);
|
|
$success = $periodArchiveWasSuccessful && $success;
|
|
}
|
|
}
|
|
|
|
return $success;
|
|
}
|
|
|
|
/**
|
|
* Returns base URL to process reports for the $idSite on a given $period
|
|
*
|
|
* @param string $idSite
|
|
* @param string $period
|
|
* @param string $date
|
|
* @param bool|false $segment
|
|
* @return string
|
|
*/
|
|
private function getVisitsRequestUrl($idSite, $period, $date, $segment = false)
|
|
{
|
|
$request = "?module=API&method=API.get&idSite=$idSite&period=$period&date=" . $date . "&format=php";
|
|
if ($segment) {
|
|
$request .= '&segment=' . urlencode($segment);
|
|
;
|
|
}
|
|
return $request;
|
|
}
|
|
|
|
private function initSegmentsToArchive()
|
|
{
|
|
$segments = \Piwik\SettingsPiwik::getKnownSegmentsToArchive();
|
|
|
|
if (empty($segments)) {
|
|
return array();
|
|
}
|
|
|
|
$this->logger->info("- Will pre-process " . count($segments) . " Segments for each website and each period: " . implode(", ", $segments));
|
|
return $segments;
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @param $lastTimestampWebsiteProcessedDay
|
|
* @param $shouldArchivePeriods
|
|
* @param $timerWebsite
|
|
* @return bool
|
|
*/
|
|
protected function processArchiveDays($idSite, $lastTimestampWebsiteProcessedDay, $shouldArchivePeriods, Timer $timerWebsite)
|
|
{
|
|
if (!$this->shouldProcessPeriod("day")) {
|
|
// skip day archiving and proceed to period processing
|
|
return true;
|
|
}
|
|
|
|
$timer = new Timer();
|
|
|
|
// Remove this website from the list of websites to be invalidated
|
|
// since it's now just about to being re-processed, makes sure another running cron archiving process
|
|
// does not archive the same idSite
|
|
$websiteInvalidatedShouldReprocess = $this->isOldReportInvalidatedForWebsite($idSite);
|
|
if ($websiteInvalidatedShouldReprocess) {
|
|
$store = new SitesToReprocessDistributedList();
|
|
$store->remove($idSite);
|
|
}
|
|
|
|
// when some data was purged from this website
|
|
// we make sure we query all previous days/weeks/months
|
|
$processDaysSince = $lastTimestampWebsiteProcessedDay;
|
|
if ($websiteInvalidatedShouldReprocess
|
|
// when --force-all-websites option,
|
|
// also forces to archive last52 days to be safe
|
|
|| $this->shouldArchiveAllSites) {
|
|
$processDaysSince = false;
|
|
}
|
|
|
|
$date = $this->getApiDateParameter($idSite, "day", $processDaysSince);
|
|
$url = $this->getVisitsRequestUrl($idSite, "day", $date);
|
|
|
|
$this->logArchiveWebsite($idSite, "day", $date);
|
|
|
|
$cliMulti = $this->makeCliMulti();
|
|
if ($cliMulti->isCommandAlreadyRunning($this->makeRequestUrl($url))) {
|
|
$this->logger->info("Skipped website id $idSite, such a process is already in progress, " . $timerWebsite->__toString());
|
|
$this->skipped++;
|
|
return false;
|
|
}
|
|
|
|
$content = $this->request($url);
|
|
$daysResponse = Common::safe_unserialize($content);
|
|
|
|
if (empty($content)
|
|
|| !is_array($daysResponse)
|
|
|| count($daysResponse) == 0
|
|
) {
|
|
// cancel marking the site as reprocessed
|
|
if ($websiteInvalidatedShouldReprocess) {
|
|
$store = new SitesToReprocessDistributedList();
|
|
$store->add($idSite);
|
|
}
|
|
|
|
$this->logError("Empty or invalid response '$content' for website id $idSite, " . $timerWebsite->__toString() . ", skipping");
|
|
$this->skippedDayOnApiError++;
|
|
$this->skipped++;
|
|
return false;
|
|
}
|
|
|
|
$visitsToday = $this->getVisitsLastPeriodFromApiResponse($daysResponse);
|
|
$visitsLastDays = $this->getVisitsFromApiResponse($daysResponse);
|
|
|
|
$this->requests++;
|
|
$this->processed++;
|
|
|
|
$shouldArchiveWithoutVisits = PluginsArchiver::doesAnyPluginArchiveWithoutVisits();
|
|
|
|
// If there is no visit today and we don't need to process this website, we can skip remaining archives
|
|
if (
|
|
0 == $visitsToday && !$shouldArchiveWithoutVisits
|
|
&& !$shouldArchivePeriods
|
|
) {
|
|
$this->logger->info("Skipped website id $idSite, no visit today, " . $timerWebsite->__toString());
|
|
$this->skippedDayNoRecentData++;
|
|
$this->skipped++;
|
|
return false;
|
|
}
|
|
|
|
if (0 == $visitsLastDays && !$shouldArchiveWithoutVisits
|
|
&& !$shouldArchivePeriods
|
|
&& $this->shouldArchiveAllSites
|
|
) {
|
|
$humanReadableDate = $this->formatReadableDateRange($date);
|
|
$this->logger->info("Skipped website id $idSite, no visits in the $humanReadableDate days, " . $timerWebsite->__toString());
|
|
$this->skippedPeriodsNoDataInPeriod++;
|
|
$this->skipped++;
|
|
return false;
|
|
}
|
|
|
|
$this->visitsToday += $visitsToday;
|
|
$this->websitesWithVisitsSinceLastRun++;
|
|
|
|
$dayArchiveWasSuccessful = $this->archiveReportsFor($idSite, "day", $this->getApiDateParameter($idSite, "day", $processDaysSince), $archiveSegments = true, $timer, $visitsToday, $visitsLastDays);
|
|
|
|
if($dayArchiveWasSuccessful) {
|
|
Option::set($this->lastRunKey($idSite, "day"), time());
|
|
}
|
|
return $dayArchiveWasSuccessful;
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @return array
|
|
*/
|
|
private function getSegmentsForSite($idSite)
|
|
{
|
|
$segmentsAllSites = $this->segments;
|
|
$segmentsThisSite = SettingsPiwik::getKnownSegmentsToArchiveForSite($idSite);
|
|
$segments = array_unique(array_merge($segmentsAllSites, $segmentsThisSite));
|
|
return $segments;
|
|
}
|
|
|
|
private function formatReadableDateRange($date)
|
|
{
|
|
if (0 === strpos($date, 'last')) {
|
|
$readable = 'last ' . str_replace('last', '', $date);
|
|
} elseif (0 === strpos($date, 'previous')) {
|
|
$readable = 'previous ' . str_replace('previous', '', $date);
|
|
} else {
|
|
$readable = 'last ' . $date;
|
|
}
|
|
|
|
return $readable;
|
|
}
|
|
|
|
/**
|
|
* Will trigger API requests for the specified Website $idSite,
|
|
* for the specified $period, for all segments that are pre-processed for this website.
|
|
* Requests are triggered using cURL multi handle
|
|
*
|
|
* @param $idSite int
|
|
* @param $period string
|
|
* @param $date string
|
|
* @param $archiveSegments bool Whether to pre-process all custom segments
|
|
* @param Timer $periodTimer
|
|
* @param $visitsToday int Visits for the "day" period of today
|
|
* @param $visitsLastDays int Visits for the last N days periods
|
|
* @return bool True on success, false if some request failed
|
|
*/
|
|
private function archiveReportsFor($idSite, $period, $date, $archiveSegments, Timer $periodTimer, $visitsToday = 0, $visitsLastDays = 0)
|
|
{
|
|
$url = $this->getVisitsRequestUrl($idSite, $period, $date, $segment = false);
|
|
$url = $this->makeRequestUrl($url);
|
|
|
|
$visitsInLastPeriod = $visitsToday;
|
|
$visitsInLastPeriods = $visitsLastDays;
|
|
$success = true;
|
|
|
|
$urls = array();
|
|
|
|
$cliMulti = $this->makeCliMulti();
|
|
|
|
$noSegmentUrl = $url;
|
|
|
|
// already processed above for "day"
|
|
if ($period != "day") {
|
|
|
|
if ($this->isAlreadyArchivingUrl($url, $idSite, $period, $date)) {
|
|
$success = false;
|
|
return $success;
|
|
}
|
|
|
|
$self = $this;
|
|
$request = new Request($url);
|
|
$request->before(function () use ($self, $url, $idSite, $period, $date) {
|
|
if ($self->isAlreadyArchivingUrl($url, $idSite, $period, $date)) {
|
|
return Request::ABORT;
|
|
}
|
|
});
|
|
$urls[] = $request;
|
|
$this->logArchiveWebsite($idSite, $period, $date);
|
|
}
|
|
|
|
$segmentRequestsCount = 0;
|
|
if ($archiveSegments) {
|
|
$urlsWithSegment = $this->getUrlsWithSegment($idSite, $period, $date);
|
|
$urls = array_merge($urls, $urlsWithSegment);
|
|
$segmentRequestsCount = count($urlsWithSegment);
|
|
|
|
// in case several segment URLs for period=range had the date= rewritten to the same value, we only call API once
|
|
$urls = array_unique($urls);
|
|
}
|
|
|
|
$this->requests += count($urls);
|
|
|
|
$response = $cliMulti->request($urls);
|
|
|
|
foreach ($urls as $index => $url) {
|
|
$content = array_key_exists($index, $response) ? $response[$index] : null;
|
|
$success = $success && $this->checkResponse($content, $url);
|
|
|
|
if ($noSegmentUrl == $url && $success) {
|
|
$stats = Common::safe_unserialize($content);
|
|
|
|
if (!is_array($stats)) {
|
|
$this->logError("Error unserializing the following response from $url: " . $content);
|
|
$success = false;
|
|
}
|
|
|
|
if ($period == 'range') {
|
|
// range returns one dataset (the sum of data between the two dates),
|
|
// whereas other periods return lastN which is N datasets in an array. Here we make our period=range dataset look like others:
|
|
$stats = array($stats);
|
|
}
|
|
|
|
$visitsInLastPeriods = $this->getVisitsFromApiResponse($stats);
|
|
$visitsInLastPeriod = $this->getVisitsLastPeriodFromApiResponse($stats);
|
|
}
|
|
}
|
|
|
|
$this->logArchivedWebsite($idSite, $period, $date, $segmentRequestsCount, $visitsInLastPeriods, $visitsInLastPeriod, $periodTimer);
|
|
|
|
return $success;
|
|
}
|
|
|
|
/**
|
|
* Logs a section in the output
|
|
*
|
|
* @param string $title
|
|
*/
|
|
private function logSection($title = "")
|
|
{
|
|
$this->logger->info("---------------------------");
|
|
if (!empty($title)) {
|
|
$this->logger->info($title);
|
|
}
|
|
}
|
|
|
|
public function logError($m)
|
|
{
|
|
if (!defined('PIWIK_ARCHIVE_NO_TRUNCATE')) {
|
|
$m = substr($m, 0, self::TRUNCATE_ERROR_MESSAGE_SUMMARY);
|
|
}
|
|
$m = str_replace(array("\n", "\t"), " ", $m);
|
|
$this->errors[] = $m;
|
|
$this->logger->error($m);
|
|
}
|
|
|
|
private function logNetworkError($url, $response)
|
|
{
|
|
$message = "Got invalid response from API request: $url. ";
|
|
if (empty($response)) {
|
|
$message .= "The response was empty. This usually means a server error. A solution to this error is generally to increase the value of 'memory_limit' in your php.ini file. ";
|
|
|
|
if($this->makeCliMulti()->supportsAsync()) {
|
|
$message .= " For more information and the error message please check in your PHP CLI error log file. As this core:archive command triggers PHP processes over the CLI, you can find where PHP CLI logs are stored by running this command: php -i | grep error_log";
|
|
} else {
|
|
$message .= " For more information and the error message please check your web server's error Log file. As this core:archive command triggers PHP processes over HTTP, you can find the error message in your Matomo's web server error logs. ";
|
|
}
|
|
} else {
|
|
$message .= "Response was '$response'";
|
|
}
|
|
|
|
$this->logError($message);
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Issues a request to $url eg. "?module=API&method=API.getDefaultMetricTranslations&format=original&serialize=1"
|
|
*
|
|
* @param string $url
|
|
* @return string
|
|
*/
|
|
private function request($url)
|
|
{
|
|
$url = $this->makeRequestUrl($url);
|
|
|
|
try {
|
|
$cliMulti = $this->makeCliMulti();
|
|
$responses = $cliMulti->request(array($url));
|
|
|
|
$response = !empty($responses) ? array_shift($responses) : null;
|
|
} catch (Exception $e) {
|
|
return $this->logNetworkError($url, $e->getMessage());
|
|
}
|
|
if ($this->checkResponse($response, $url)) {
|
|
return $response;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private function checkResponse($response, $url)
|
|
{
|
|
if (empty($response)
|
|
|| stripos($response, 'error') !== false
|
|
) {
|
|
return $this->logNetworkError($url, $response);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Initializes the various parameters to the script, based on input parameters.
|
|
*
|
|
*/
|
|
private function initStateFromParameters()
|
|
{
|
|
$this->todayArchiveTimeToLive = Rules::getTodayArchiveTimeToLive();
|
|
$this->processPeriodsMaximumEverySeconds = $this->getDelayBetweenPeriodsArchives();
|
|
$this->lastSuccessRunTimestamp = $this->getLastSuccessRunTimestamp();
|
|
$this->shouldArchiveOnlySitesWithTrafficSince = $this->isShouldArchiveAllSitesWithTrafficSince();
|
|
$this->shouldArchiveOnlySpecificPeriods = $this->getPeriodsToProcess();
|
|
|
|
if ($this->shouldArchiveOnlySitesWithTrafficSince !== false) {
|
|
// force-all-periods is set here
|
|
$this->archiveAndRespectTTL = false;
|
|
}
|
|
}
|
|
|
|
private function getSecondsSinceLastArchive()
|
|
{
|
|
$wasNotCustomTimeRequested = $this->shouldArchiveOnlySitesWithTrafficSince === false;
|
|
|
|
if ($wasNotCustomTimeRequested && !empty($this->lastSuccessRunTimestamp)) {
|
|
// there was a previous successful run
|
|
|
|
return time() - $this->lastSuccessRunTimestamp;
|
|
|
|
} elseif (is_numeric($this->shouldArchiveOnlySitesWithTrafficSince)) {
|
|
// $shouldArchiveAllPeriodsSince was specified
|
|
$secondsSinceStart = time() - $this->archivingStartingTime;
|
|
return $this->shouldArchiveOnlySitesWithTrafficSince + $secondsSinceStart;
|
|
}
|
|
|
|
// force-all-periods without value
|
|
return self::ARCHIVE_SITES_WITH_TRAFFIC_SINCE;
|
|
}
|
|
|
|
public function filterWebsiteIds(&$websiteIds)
|
|
{
|
|
// Keep only the websites that do exist
|
|
$websiteIds = array_intersect($websiteIds, $this->allWebsites);
|
|
|
|
/**
|
|
* Triggered by the **core:archive** console command so plugins can modify the list of
|
|
* websites that the archiving process will be launched for.
|
|
*
|
|
* Plugins can use this hook to add websites to archive, remove websites to archive, or change
|
|
* the order in which websites will be archived.
|
|
*
|
|
* @param array $websiteIds The list of website IDs to launch the archiving process for.
|
|
*/
|
|
Piwik::postEvent('CronArchive.filterWebsiteIds', array(&$websiteIds));
|
|
}
|
|
|
|
/**
|
|
* @internal
|
|
* @param $api
|
|
*/
|
|
public function setApiToInvalidateArchivedReport($api)
|
|
{
|
|
$this->apiToInvalidateArchivedReport = $api;
|
|
}
|
|
|
|
private function getApiToInvalidateArchivedReport()
|
|
{
|
|
if ($this->apiToInvalidateArchivedReport) {
|
|
return $this->apiToInvalidateArchivedReport;
|
|
}
|
|
|
|
return CoreAdminHomeAPI::getInstance();
|
|
}
|
|
|
|
public function invalidateArchivedReportsForSitesThatNeedToBeArchivedAgain()
|
|
{
|
|
$sitesPerDays = $this->invalidator->getRememberedArchivedReportsThatShouldBeInvalidated();
|
|
|
|
foreach ($sitesPerDays as $date => $siteIds) {
|
|
//Concurrent transaction logic will end up with duplicates set. Adding array_unique to the siteIds.
|
|
$listSiteIds = implode(',', array_unique($siteIds ));
|
|
|
|
try {
|
|
$this->logger->info('- Will invalidate archived reports for ' . $date . ' for following websites ids: ' . $listSiteIds);
|
|
$this->getApiToInvalidateArchivedReport()->invalidateArchivedReports($siteIds, $date);
|
|
} catch (Exception $e) {
|
|
$this->logger->info('Failed to invalidate archived reports: ' . $e->getMessage());
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the list of sites to loop over and archive.
|
|
* @return array
|
|
*/
|
|
public function initWebsiteIds()
|
|
{
|
|
if (count($this->shouldArchiveSpecifiedSites) > 0) {
|
|
$this->logger->info("- Will process " . count($this->shouldArchiveSpecifiedSites) . " websites (--force-idsites)");
|
|
|
|
return $this->shouldArchiveSpecifiedSites;
|
|
}
|
|
|
|
$this->findWebsiteIdsInTimezoneWithNewDay($this->allWebsites);
|
|
$this->findInvalidatedSitesToReprocess();
|
|
|
|
if ($this->shouldArchiveAllSites) {
|
|
$this->logger->info("- Will process all " . count($this->allWebsites) . " websites");
|
|
}
|
|
|
|
return $this->allWebsites;
|
|
}
|
|
|
|
private function updateIdSitesInvalidatedOldReports()
|
|
{
|
|
$store = new SitesToReprocessDistributedList();
|
|
$this->idSitesInvalidatedOldReports = $store->getAll();
|
|
}
|
|
|
|
/**
|
|
* Return All websites that had reports in the past which were invalidated recently
|
|
* (see API CoreAdminHome.invalidateArchivedReports)
|
|
* eg. when using Python log import script
|
|
*
|
|
* @return array
|
|
*/
|
|
private function findInvalidatedSitesToReprocess()
|
|
{
|
|
$this->updateIdSitesInvalidatedOldReports();
|
|
|
|
if (count($this->idSitesInvalidatedOldReports) > 0) {
|
|
$ids = ", IDs: " . implode(", ", $this->idSitesInvalidatedOldReports);
|
|
$this->logger->info("- Will process " . count($this->idSitesInvalidatedOldReports)
|
|
. " other websites because some old data reports have been invalidated (eg. using the Log Import script or the InvalidateReports plugin) "
|
|
. $ids);
|
|
}
|
|
|
|
return $this->idSitesInvalidatedOldReports;
|
|
}
|
|
|
|
/**
|
|
* Detects whether a site had visits since midnight in the websites timezone
|
|
*
|
|
* @param $idSite
|
|
* @return bool
|
|
*/
|
|
private function hadWebsiteTrafficSinceMidnightInTimezone($idSite)
|
|
{
|
|
$timezone = Site::getTimezoneFor($idSite);
|
|
|
|
$nowInTimezone = Date::factory('now', $timezone);
|
|
$midnightInTimezone = $nowInTimezone->setTime('00:00:00');
|
|
|
|
$secondsSinceMidnight = $nowInTimezone->getTimestamp() - $midnightInTimezone->getTimestamp();
|
|
|
|
$secondsSinceLastArchive = $this->getSecondsSinceLastArchive();
|
|
if ($secondsSinceLastArchive < $secondsSinceMidnight) {
|
|
$secondsBackToLookForVisits = $secondsSinceLastArchive;
|
|
$sinceInfo = "(since the last successful archiving)";
|
|
} else {
|
|
$secondsBackToLookForVisits = $secondsSinceMidnight;
|
|
$sinceInfo = "(since midnight)";
|
|
}
|
|
|
|
$from = Date::now()->subSeconds($secondsBackToLookForVisits)->getDatetime();
|
|
$to = Date::now()->addHour(1)->getDatetime();
|
|
|
|
$dao = new RawLogDao();
|
|
$hasVisits = $dao->hasSiteVisitsBetweenTimeframe($from, $to, $idSite);
|
|
|
|
if ($hasVisits) {
|
|
$this->logger->info("- tracking data found for website id $idSite since $from UTC $sinceInfo");
|
|
} else {
|
|
$this->logger->info("- no new tracking data for website id $idSite since $from UTC $sinceInfo");
|
|
}
|
|
|
|
return $hasVisits;
|
|
}
|
|
|
|
/**
|
|
* Returns the list of timezones where the specified timestamp in that timezone
|
|
* is on a different day than today in that timezone.
|
|
*
|
|
* @return array
|
|
*/
|
|
private function getTimezonesHavingNewDaySinceLastRun()
|
|
{
|
|
$timestamp = $this->lastSuccessRunTimestamp;
|
|
$uniqueTimezones = APISitesManager::getInstance()->getUniqueSiteTimezones();
|
|
$timezoneToProcess = array();
|
|
foreach ($uniqueTimezones as &$timezone) {
|
|
$processedDateInTz = Date::factory((int)$timestamp, $timezone);
|
|
$currentDateInTz = Date::factory('now', $timezone);
|
|
|
|
if ($processedDateInTz->toString() != $currentDateInTz->toString()) {
|
|
$timezoneToProcess[] = $timezone;
|
|
}
|
|
}
|
|
return $timezoneToProcess;
|
|
}
|
|
|
|
private function hasBeenProcessedSinceMidnight($idSite, $lastTimestampWebsiteProcessedDay)
|
|
{
|
|
if (false === $lastTimestampWebsiteProcessedDay) {
|
|
return true;
|
|
}
|
|
|
|
$timezone = Site::getTimezoneFor($idSite);
|
|
|
|
$dateInTimezone = Date::factory('now', $timezone);
|
|
$midnightInTimezone = $dateInTimezone->setTime('00:00:00');
|
|
|
|
$lastProcessedDateInTimezone = Date::factory((int) $lastTimestampWebsiteProcessedDay, $timezone);
|
|
|
|
return $lastProcessedDateInTimezone->getTimestamp() >= $midnightInTimezone->getTimestamp();
|
|
}
|
|
|
|
/**
|
|
* Returns the list of websites in which timezones today is a new day
|
|
* (compared to the last time archiving was executed)
|
|
*
|
|
* @param $websiteIds
|
|
* @return array Website IDs
|
|
*/
|
|
private function findWebsiteIdsInTimezoneWithNewDay($websiteIds)
|
|
{
|
|
$timezones = $this->getTimezonesHavingNewDaySinceLastRun();
|
|
$websiteDayHasFinishedSinceLastRun = APISitesManager::getInstance()->getSitesIdFromTimezones($timezones);
|
|
$websiteDayHasFinishedSinceLastRun = array_intersect($websiteDayHasFinishedSinceLastRun, $websiteIds);
|
|
$this->websiteDayHasFinishedSinceLastRun = $websiteDayHasFinishedSinceLastRun;
|
|
|
|
if (count($websiteDayHasFinishedSinceLastRun) > 0) {
|
|
$ids = !empty($websiteDayHasFinishedSinceLastRun) ? ", IDs: " . implode(", ", $websiteDayHasFinishedSinceLastRun) : "";
|
|
$this->logger->info("- Will process " . count($websiteDayHasFinishedSinceLastRun)
|
|
. " other websites because the last time they were archived was on a different day (in the website's timezone) "
|
|
. $ids);
|
|
}
|
|
|
|
return $websiteDayHasFinishedSinceLastRun;
|
|
}
|
|
|
|
private function logInitInfo()
|
|
{
|
|
$this->logSection("INIT");
|
|
$this->logger->info("Running Matomo " . Version::VERSION . " as Super User");
|
|
}
|
|
|
|
private function logArchiveTimeoutInfo()
|
|
{
|
|
$this->logSection("NOTES");
|
|
|
|
// Recommend to disable browser archiving when using this script
|
|
if (Rules::isBrowserTriggerEnabled()) {
|
|
$this->logger->info("- If you execute this script at least once per hour (or more often) in a crontab, you may disable 'Browser trigger archiving' in Matomo UI > Settings > General Settings.");
|
|
$this->logger->info(" See the doc at: https://matomo.org/docs/setup-auto-archiving/");
|
|
}
|
|
$this->logger->info("- Reports for today will be processed at most every " . $this->todayArchiveTimeToLive
|
|
. " seconds. You can change this value in Matomo UI > Settings > General Settings.");
|
|
|
|
$this->logger->info("- Reports for the current week/month/year will be requested at most every "
|
|
. $this->processPeriodsMaximumEverySeconds . " seconds.");
|
|
|
|
foreach (array('week', 'month', 'year', 'range') as $period) {
|
|
$ttl = Rules::getPeriodArchiveTimeToLiveDefault($period);
|
|
|
|
if (!empty($ttl) && $ttl !== $this->todayArchiveTimeToLive) {
|
|
$this->logger->info("- Reports for the current $period will be processed at most every " . $ttl
|
|
. " seconds. You can change this value in config/config.ini.php by editing 'time_before_" . $period . "_archive_considered_outdated' in the '[General]' section.");
|
|
}
|
|
}
|
|
|
|
// Try and not request older data we know is already archived
|
|
if ($this->lastSuccessRunTimestamp !== false) {
|
|
$dateLast = time() - $this->lastSuccessRunTimestamp;
|
|
$this->logger->info("- Archiving was last executed without error "
|
|
. $this->formatter->getPrettyTimeFromSeconds($dateLast, true) . " ago");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the delay in seconds, that should be enforced, between calling archiving for Periods Archives.
|
|
* It can be set by --force-timeout-for-periods=X
|
|
*
|
|
* @return int
|
|
*/
|
|
private function getDelayBetweenPeriodsArchives()
|
|
{
|
|
if (empty($this->forceTimeoutPeriod)) {
|
|
return self::SECONDS_DELAY_BETWEEN_PERIOD_ARCHIVES;
|
|
}
|
|
|
|
// Ensure the cache for periods is at least as high as cache for today
|
|
if ($this->forceTimeoutPeriod > $this->todayArchiveTimeToLive) {
|
|
return $this->forceTimeoutPeriod;
|
|
}
|
|
|
|
$this->logger->info("WARNING: Automatically increasing --force-timeout-for-periods from {$this->forceTimeoutPeriod} to "
|
|
. $this->todayArchiveTimeToLive
|
|
. " to match the cache timeout for Today's report specified in Matomo UI > Settings > General Settings");
|
|
|
|
return $this->todayArchiveTimeToLive;
|
|
}
|
|
|
|
private function isShouldArchiveAllSitesWithTrafficSince()
|
|
{
|
|
if (empty($this->shouldArchiveAllPeriodsSince)) {
|
|
return false;
|
|
}
|
|
|
|
if (is_numeric($this->shouldArchiveAllPeriodsSince)
|
|
&& $this->shouldArchiveAllPeriodsSince > 1
|
|
) {
|
|
return (int)$this->shouldArchiveAllPeriodsSince;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private function getVisitsLastPeriodFromApiResponse($stats)
|
|
{
|
|
if (empty($stats)) {
|
|
return 0;
|
|
}
|
|
|
|
$today = end($stats);
|
|
|
|
if (empty($today['nb_visits'])) {
|
|
return 0;
|
|
}
|
|
|
|
return $today['nb_visits'];
|
|
}
|
|
|
|
private function getVisitsFromApiResponse($stats)
|
|
{
|
|
if (empty($stats)) {
|
|
return 0;
|
|
}
|
|
|
|
$visits = 0;
|
|
foreach ($stats as $metrics) {
|
|
if (empty($metrics['nb_visits'])) {
|
|
continue;
|
|
}
|
|
$visits += $metrics['nb_visits'];
|
|
}
|
|
|
|
return $visits;
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @param $period
|
|
* @param $lastTimestampWebsiteProcessed
|
|
* @return float|int|true
|
|
*/
|
|
private function getApiDateParameter($idSite, $period, $lastTimestampWebsiteProcessed = false)
|
|
{
|
|
$dateRangeForced = $this->getDateRangeToProcess();
|
|
|
|
if (!empty($dateRangeForced)) {
|
|
return $dateRangeForced;
|
|
}
|
|
|
|
return $this->getDateLastN($idSite, $period, $lastTimestampWebsiteProcessed);
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @param $period
|
|
* @param $date
|
|
* @param $segmentsCount
|
|
* @param $visitsInLastPeriods
|
|
* @param $visitsToday
|
|
* @param $timer
|
|
*/
|
|
private function logArchivedWebsite($idSite, $period, $date, $segmentsCount, $visitsInLastPeriods, $visitsToday, Timer $timer)
|
|
{
|
|
if (strpos($date, 'last') === 0 || strpos($date, 'previous') === 0) {
|
|
$humanReadable = $this->formatReadableDateRange($date);
|
|
$visitsInLastPeriods = (int)$visitsInLastPeriods . " visits in $humanReadable " . $period . "s, ";
|
|
$thisPeriod = $period == "day" ? "today" : "this " . $period;
|
|
$visitsInLastPeriod = (int)$visitsToday . " visits " . $thisPeriod . ", ";
|
|
} else {
|
|
$visitsInLastPeriods = (int)$visitsInLastPeriods . " visits in " . $period . "s included in: $date, ";
|
|
$visitsInLastPeriod = '';
|
|
}
|
|
|
|
$this->logger->info("Archived website id = $idSite, period = $period, $segmentsCount segments, "
|
|
. $visitsInLastPeriods
|
|
. $visitsInLastPeriod
|
|
. $timer->__toString());
|
|
}
|
|
|
|
private function getDateRangeToProcess()
|
|
{
|
|
if (empty($this->restrictToDateRange)) {
|
|
return false;
|
|
}
|
|
|
|
if (strpos($this->restrictToDateRange, ',') === false) {
|
|
throw new Exception("--force-date-range expects a date range ie. YYYY-MM-DD,YYYY-MM-DD");
|
|
}
|
|
|
|
return $this->restrictToDateRange;
|
|
}
|
|
|
|
/**
|
|
* @return array
|
|
*/
|
|
private function getPeriodsToProcess()
|
|
{
|
|
$this->restrictToPeriods = array_intersect($this->restrictToPeriods, $this->getDefaultPeriodsToProcess());
|
|
$this->restrictToPeriods = array_intersect($this->restrictToPeriods, PeriodFactory::getPeriodsEnabledForAPI());
|
|
|
|
return $this->restrictToPeriods;
|
|
}
|
|
|
|
/**
|
|
* @return array
|
|
*/
|
|
private function getDefaultPeriodsToProcess()
|
|
{
|
|
return array('day', 'week', 'month', 'year', 'range');
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @return bool
|
|
*/
|
|
private function isOldReportInvalidatedForWebsite($idSite)
|
|
{
|
|
return in_array($idSite, $this->idSitesInvalidatedOldReports);
|
|
}
|
|
|
|
private function isWebsiteUsingTheTracker($idSite)
|
|
{
|
|
if (!isset($this->idSitesNotUsingTracker)) {
|
|
// we want to trigger event only once
|
|
$this->idSitesNotUsingTracker = array();
|
|
|
|
/**
|
|
* This event is triggered when detecting whether there are sites that do not use the tracker.
|
|
*
|
|
* By default we only archive a site when there was actually any visit since the last archiving.
|
|
* However, some plugins do import data from another source instead of using the tracker and therefore
|
|
* will never have any visits for this site. To make sure we still archive data for such a site when
|
|
* archiving for this site is requested, you can listen to this event and add the idSite to the list of
|
|
* sites that do not use the tracker.
|
|
*
|
|
* @param bool $idSitesNotUsingTracker The list of idSites that rather import data instead of using the tracker
|
|
*/
|
|
Piwik::postEvent('CronArchive.getIdSitesNotUsingTracker', array(&$this->idSitesNotUsingTracker));
|
|
|
|
if (!empty($this->idSitesNotUsingTracker)) {
|
|
$this->logger->info("- The following websites do not use the tracker: " . implode(',', $this->idSitesNotUsingTracker));
|
|
}
|
|
}
|
|
|
|
$isUsingTracker = !in_array($idSite, $this->idSitesNotUsingTracker);
|
|
|
|
return $isUsingTracker;
|
|
}
|
|
|
|
private function shouldProcessPeriod($period)
|
|
{
|
|
if (empty($this->shouldArchiveOnlySpecificPeriods)) {
|
|
return true;
|
|
}
|
|
|
|
return in_array($period, $this->shouldArchiveOnlySpecificPeriods);
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @param $period
|
|
* @param $lastTimestampWebsiteProcessed
|
|
* @return string
|
|
*/
|
|
private function getDateLastN($idSite, $period, $lastTimestampWebsiteProcessed)
|
|
{
|
|
$dateLastMax = self::DEFAULT_DATE_LAST;
|
|
if ($period == 'year') {
|
|
$dateLastMax = self::DEFAULT_DATE_LAST_YEARS;
|
|
} elseif ($period == 'week') {
|
|
$dateLastMax = self::DEFAULT_DATE_LAST_WEEKS;
|
|
}
|
|
if (empty($lastTimestampWebsiteProcessed)) {
|
|
$creationDateFor = \Piwik\Site::getCreationDateFor($idSite);
|
|
$lastTimestampWebsiteProcessed = strtotime($creationDateFor);
|
|
}
|
|
|
|
// Enforcing last2 at minimum to work around timing issues and ensure we make most archives available
|
|
$dateLast = floor((time() - $lastTimestampWebsiteProcessed) / 86400) + 2;
|
|
if ($dateLast > $dateLastMax) {
|
|
$dateLast = $dateLastMax;
|
|
}
|
|
|
|
if (!empty($this->dateLastForced)) {
|
|
$dateLast = $this->dateLastForced;
|
|
}
|
|
|
|
return "last" . $dateLast;
|
|
}
|
|
|
|
/**
|
|
* @return int
|
|
*/
|
|
private function getConcurrentRequestsPerWebsite()
|
|
{
|
|
if (false !== $this->concurrentRequestsPerWebsite) {
|
|
return $this->concurrentRequestsPerWebsite;
|
|
}
|
|
|
|
return self::MAX_CONCURRENT_API_REQUESTS;
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @return false|string
|
|
*/
|
|
private function getPeriodLastProcessedTimestamp($idSite)
|
|
{
|
|
$timestamp = Option::get($this->lastRunKey($idSite, "periods"));
|
|
return $this->sanitiseTimestamp($timestamp);
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @return false|string
|
|
*/
|
|
private function getDayLastProcessedTimestamp($idSite)
|
|
{
|
|
$timestamp = Option::get($this->lastRunKey($idSite, "day"));
|
|
return $this->sanitiseTimestamp($timestamp);
|
|
}
|
|
|
|
/**
|
|
* @return false|string
|
|
*/
|
|
private function getLastSuccessRunTimestamp()
|
|
{
|
|
$timestamp = Option::get(self::OPTION_ARCHIVING_FINISHED_TS);
|
|
return $this->sanitiseTimestamp($timestamp);
|
|
}
|
|
|
|
private function sanitiseTimestamp($timestamp)
|
|
{
|
|
$now = time();
|
|
return ($timestamp < $now) ? $timestamp : $now;
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @return array of date strings
|
|
*/
|
|
private function getCustomDateRangeToPreProcess($idSite)
|
|
{
|
|
static $cache = null;
|
|
if (is_null($cache)) {
|
|
$cache = $this->loadCustomDateRangeToPreProcess();
|
|
}
|
|
if (empty($cache[$idSite])) {
|
|
return array();
|
|
}
|
|
$dates = array_unique($cache[$idSite]);
|
|
return $dates;
|
|
}
|
|
|
|
/**
|
|
* @return array
|
|
*/
|
|
private function loadCustomDateRangeToPreProcess()
|
|
{
|
|
$customDateRangesToProcessForSites = array();
|
|
|
|
// For all users who have selected this website to load by default,
|
|
// we load the default period/date that will be loaded for this user
|
|
// and make sure it's pre-archived
|
|
$allUsersPreferences = APIUsersManager::getInstance()->getAllUsersPreferences(array(
|
|
APIUsersManager::PREFERENCE_DEFAULT_REPORT_DATE,
|
|
APIUsersManager::PREFERENCE_DEFAULT_REPORT
|
|
));
|
|
|
|
foreach ($allUsersPreferences as $userLogin => $userPreferences) {
|
|
if (!isset($userPreferences[APIUsersManager::PREFERENCE_DEFAULT_REPORT_DATE])) {
|
|
continue;
|
|
}
|
|
|
|
$defaultDate = $userPreferences[APIUsersManager::PREFERENCE_DEFAULT_REPORT_DATE];
|
|
$preference = new UserPreferences();
|
|
$period = $preference->getDefaultPeriod($defaultDate);
|
|
if ($period != 'range') {
|
|
continue;
|
|
}
|
|
|
|
if (isset($userPreferences[APIUsersManager::PREFERENCE_DEFAULT_REPORT])
|
|
&& is_numeric($userPreferences[APIUsersManager::PREFERENCE_DEFAULT_REPORT])) {
|
|
// If user selected one particular website ID
|
|
$idSites = array($userPreferences[APIUsersManager::PREFERENCE_DEFAULT_REPORT]);
|
|
} else {
|
|
// If user selected "All websites" or some other random value, we pre-process all websites that they have access to
|
|
$idSites = APISitesManager::getInstance()->getSitesIdWithAtLeastViewAccess($userLogin);
|
|
}
|
|
|
|
foreach ($idSites as $idSite) {
|
|
$customDateRangesToProcessForSites[$idSite][] = $defaultDate;
|
|
}
|
|
}
|
|
|
|
return $customDateRangesToProcessForSites;
|
|
}
|
|
|
|
/**
|
|
* @param $url
|
|
* @return string
|
|
*/
|
|
private function makeRequestUrl($url)
|
|
{
|
|
$url = $url . self::APPEND_TO_API_REQUEST;
|
|
|
|
if ($this->shouldStartProfiler) {
|
|
$url .= "&xhprof=2";
|
|
}
|
|
|
|
if ($this->testmode) {
|
|
$url .= "&testmode=1";
|
|
}
|
|
|
|
/**
|
|
* @ignore
|
|
*/
|
|
Piwik::postEvent('CronArchive.alterArchivingRequestUrl', [&$url]);
|
|
|
|
return $url;
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @param $period
|
|
* @param $date
|
|
* @return Request[]
|
|
*/
|
|
private function getUrlsWithSegment($idSite, $period, $date)
|
|
{
|
|
$urlsWithSegment = array();
|
|
$segmentsForSite = $this->getSegmentsForSite($idSite);
|
|
|
|
$segments = array();
|
|
foreach ($segmentsForSite as $segment) {
|
|
if ($this->shouldSkipSegmentArchiving($segment)) {
|
|
$this->logger->info("- skipping segment archiving for '{segment}'.", array('segment' => $segment));
|
|
|
|
continue;
|
|
}
|
|
|
|
$segments[] = $segment;
|
|
}
|
|
|
|
$segmentCount = count($segments);
|
|
$processedSegmentCount = 0;
|
|
|
|
foreach ($segments as $segment) {
|
|
$dateParamForSegment = $this->segmentArchivingRequestUrlProvider->getUrlParameterDateString($idSite, $period, $date, $segment);
|
|
|
|
$urlWithSegment = $this->getVisitsRequestUrl($idSite, $period, $dateParamForSegment, $segment);
|
|
$urlWithSegment = $this->makeRequestUrl($urlWithSegment);
|
|
|
|
if ($this->isAlreadyArchivingSegment($urlWithSegment, $idSite, $period, $segment)) {
|
|
continue;
|
|
}
|
|
|
|
$request = new Request($urlWithSegment);
|
|
$logger = $this->logger;
|
|
$self = $this;
|
|
$request->before(function () use ($logger, $segment, $segmentCount, &$processedSegmentCount, $idSite, $period, $urlWithSegment, $self) {
|
|
|
|
if ($self->isAlreadyArchivingSegment($urlWithSegment, $idSite, $period, $segment)) {
|
|
return Request::ABORT;
|
|
}
|
|
|
|
$processedSegmentCount++;
|
|
$logger->info(sprintf(
|
|
'- pre-processing segment %d/%d %s',
|
|
$processedSegmentCount,
|
|
$segmentCount,
|
|
$segment
|
|
));
|
|
});
|
|
|
|
$urlsWithSegment[] = $request;
|
|
}
|
|
|
|
return $urlsWithSegment;
|
|
}
|
|
|
|
private function isAlreadyArchivingAnyLowerOrThisPeriod($idSite, $period, $segment = false)
|
|
{
|
|
$periodOrder = array('day', 'week', 'month', 'year');
|
|
$cliMulti = $this->makeCliMulti();
|
|
|
|
$index = array_search($period, $periodOrder);
|
|
if ($index !== false) {
|
|
// we only need to check for week, month, year if any earlier period is already running
|
|
// so when period = month, then we check for day and week
|
|
|
|
for ($i = 0; $i <= $index; $i++) {
|
|
$periodToCheck = $periodOrder[$i];
|
|
|
|
// the date will be ignored in isCommandAlreadyRunning() because it could be any date
|
|
$urlCheck = $this->getVisitsRequestUrl($idSite, $periodToCheck, 'last2', $segment);
|
|
$urlCheck = $this->makeRequestUrl($urlCheck);
|
|
|
|
if ($cliMulti->isCommandAlreadyRunning($urlCheck)) {
|
|
return $periodToCheck;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private function createSitesToArchiveQueue($websitesIds)
|
|
{
|
|
// use synchronous, single process queue if --force-idsites is used or sharing site IDs isn't supported
|
|
if (!SharedSiteIds::isSupported() || !empty($this->shouldArchiveSpecifiedSites)) {
|
|
return new FixedSiteIds($websitesIds);
|
|
}
|
|
|
|
// use separate shared queue if --force-all-websites is used
|
|
if (!empty($this->shouldArchiveAllSites)) {
|
|
return new SharedSiteIds($websitesIds, SharedSiteIds::OPTION_ALL_WEBSITES);
|
|
}
|
|
|
|
return new SharedSiteIds($websitesIds);
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @param $period
|
|
* @param $date
|
|
*/
|
|
private function logArchiveWebsite($idSite, $period, $date)
|
|
{
|
|
$this->logger->info(sprintf(
|
|
"Will pre-process for website id = %s, period = %s, date = %s",
|
|
$idSite,
|
|
$period,
|
|
$date
|
|
));
|
|
$this->logger->info('- pre-processing all visits');
|
|
}
|
|
|
|
public function isAlreadyArchivingUrl($url, $idSite, $period, $date)
|
|
{
|
|
$periodInProgress = $this->isAlreadyArchivingAnyLowerOrThisPeriod($idSite, $period);
|
|
if ($periodInProgress) {
|
|
$this->logger->info("- skipping archiving for period '{period}' because processing the period '{periodcheck}' is already in progress.", array('period' => $period, 'periodcheck' => $periodInProgress));
|
|
return true;
|
|
}
|
|
|
|
$cliMulti = $this->makeCliMulti();
|
|
if ($cliMulti->isCommandAlreadyRunning($url)) {
|
|
$this->logArchiveWebsiteAlreadyInProcess($idSite, $period, $date);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
public function isAlreadyArchivingSegment($urlWithSegment, $idSite, $period, $segment)
|
|
{
|
|
// we can check for this or lower period only when the below condition is given. Otherwise the archiver might launch
|
|
// the following requests at once:
|
|
// - week
|
|
// - week segment1
|
|
// - week segment2
|
|
// and it would always skip archiving the segments cause the week was launched first and would be running when
|
|
// it starts them all 3 "at the same time".
|
|
$isProcessingOne = $this->concurrentRequestsPerWebsite == 1;
|
|
|
|
$periodInProgress = $isProcessingOne && $this->isAlreadyArchivingAnyLowerOrThisPeriod($idSite, $period);
|
|
if ($periodInProgress) {
|
|
$this->logger->info("- skipping segment archiving for period '{period}' with segment '{segment}' because processing the period '{periodcheck}' is already in progress.", array('segment' => $segment, 'period' => $period, 'periodcheck' => $periodInProgress));
|
|
return true;
|
|
}
|
|
|
|
$cliMulti = $this->makeCliMulti();
|
|
if ($cliMulti->isCommandAlreadyRunning($urlWithSegment)) {
|
|
$this->logger->info("- skipping segment archiving for '{segment}' because such a process is already in progress.", array('segment' => $segment));
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* @param $idSite
|
|
* @param $period
|
|
* @param $date
|
|
*/
|
|
private function logArchiveWebsiteAlreadyInProcess($idSite, $period, $date)
|
|
{
|
|
$this->logger->info(sprintf(
|
|
"Will not pre-process for website id = %s, period = %s, date = %s because such a process is already in progress.",
|
|
$idSite,
|
|
$period,
|
|
$date
|
|
));
|
|
}
|
|
|
|
private function shouldSkipSegmentArchiving($segment)
|
|
{
|
|
if ($this->disableSegmentsArchiving) {
|
|
return true;
|
|
}
|
|
|
|
return !empty($this->segmentsToForce) && !in_array($segment, $this->segmentsToForce);
|
|
}
|
|
|
|
private function logForcedSegmentInfo()
|
|
{
|
|
if (empty($this->segmentsToForce)) {
|
|
return;
|
|
}
|
|
|
|
$this->logger->info("- Limiting segment archiving to following segments:");
|
|
foreach ($this->segmentsToForce as $segmentDefinition) {
|
|
$this->logger->info(" * " . $segmentDefinition);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return CliMulti
|
|
*/
|
|
private function makeCliMulti()
|
|
{
|
|
/** @var CliMulti $cliMulti */
|
|
$cliMulti = StaticContainer::getContainer()->make('Piwik\CliMulti');
|
|
$cliMulti->setUrlToPiwik($this->urlToPiwik);
|
|
$cliMulti->setPhpCliConfigurationOptions($this->phpCliConfigurationOptions);
|
|
$cliMulti->setAcceptInvalidSSLCertificate($this->acceptInvalidSSLCertificate);
|
|
$cliMulti->setConcurrentProcessesLimit($this->getConcurrentRequestsPerWebsite());
|
|
$cliMulti->runAsSuperUser();
|
|
$cliMulti->onProcessFinish(function ($pid) {
|
|
$this->printPerformanceStatsForProcess($pid);
|
|
});
|
|
return $cliMulti;
|
|
}
|
|
|
|
public function setUrlToPiwik($url)
|
|
{
|
|
$this->urlToPiwik = $url;
|
|
}
|
|
|
|
private function printPerformanceStatsForProcess($childPid)
|
|
{
|
|
if (!$this->isArchiveProfilingEnabled) {
|
|
return;
|
|
}
|
|
|
|
$data = Logger::getMeasurementsFor(getmypid(), $childPid);
|
|
if (empty($data)) {
|
|
return;
|
|
}
|
|
|
|
$message = "";
|
|
foreach ($data as $request => $measurements) {
|
|
$message .= "PERFORMANCE FOR " . $request . "\n ";
|
|
$message .= implode("\n ", $measurements) . "\n";
|
|
}
|
|
$this->logger->info($message);
|
|
}
|
|
}
|