PDF rausgenommen

This commit is contained in:
aschwarz
2023-01-23 11:03:31 +01:00
parent 82d562a322
commit a6523903eb
28078 changed files with 4247552 additions and 2 deletions

View File

@ -0,0 +1,65 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\CronArchive;
class FixedSiteIds
{
private $siteIds = array();
private $index = -1;
public function __construct($websiteIds)
{
if (!empty($websiteIds)) {
$this->siteIds = array_values($websiteIds);
}
}
public function getInitialSiteIds()
{
return $this->siteIds;
}
/**
* Get the number of total websites that needs to be processed.
*
* @return int
*/
public function getNumSites()
{
return count($this->siteIds);
}
/**
* Get the number of already processed websites. All websites were processed by the current archiver.
*
* @return int
*/
public function getNumProcessedWebsites()
{
$numProcessed = $this->index + 1;
if ($numProcessed > $this->getNumSites()) {
return $this->getNumSites();
}
return $numProcessed;
}
public function getNextSiteId()
{
$this->index++;
if (!empty($this->siteIds[$this->index])) {
return $this->siteIds[$this->index];
}
return null;
}
}

View File

@ -0,0 +1,119 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
namespace Piwik\CronArchive\Performance;
use Piwik\ArchiveProcessor;
use Piwik\Common;
use Piwik\Config;
use Piwik\Option;
use Piwik\Timer;
use Piwik\Url;
use Psr\Log\LoggerInterface;
class Logger
{
/**
* @var int
*/
private $isEnabled;
/**
* @var LoggerInterface
*/
private $logger;
/**
* @var int
*/
private $archivingRunId;
public function __construct(Config $config, LoggerInterface $logger = null)
{
$this->isEnabled = $config->Debug['archiving_profile'] == 1;
$this->logger = $logger;
$this->archivingRunId = $this->getArchivingRunId();
if (empty($this->archivingRunId)) {
$this->isEnabled = false;
}
}
public function logMeasurement($category, $name, ArchiveProcessor\Parameters $activeArchivingParams, Timer $timer)
{
if (!$this->isEnabled || !$this->logger) {
return;
}
$measurement = new Measurement($category, $name, $activeArchivingParams->getSite()->getId(),
$activeArchivingParams->getPeriod()->getRangeString(), $activeArchivingParams->getPeriod()->getLabel(),
$activeArchivingParams->getSegment()->getString(), $timer->getTime(), $timer->getMemoryLeakValue(),
$timer->getPeakMemoryValue());
$params = array_merge($_GET);
unset($params['pid']);
unset($params['runid']);
$this->logger->info("[runid={runid},pid={pid}] {request}: {measurement}", [
'pid' => Common::getRequestVar('pid', false),
'runid' => $this->getArchivingRunId(),
'request' => Url::getQueryStringFromParameters($params),
'measurement' => $measurement,
]);
}
public static function getMeasurementsFor($runId, $childPid)
{
$profilingLogFile = preg_replace('/[\'"]/', '', Config::getInstance()->Debug['archive_profiling_log']);
if (!is_readable($profilingLogFile)) {
return [];
}
$runId = self::cleanId($runId);
$childPid = self::cleanId($childPid);
$lineIdentifier = "[runid=$runId,pid=$childPid]";
$lines = `grep "$childPid" "$profilingLogFile"`;
$lines = explode("\n", $lines);
$lines = array_map(function ($line) use ($lineIdentifier) {
$index = strpos($line, $lineIdentifier);
if ($index === false) {
return null;
}
$line = substr($line, $index + strlen($lineIdentifier));
return trim($line);
}, $lines);
$lines = array_filter($lines);
$lines = array_map(function ($line) {
$parts = explode(":", $line, 2);
$parts = array_map('trim', $parts);
return $parts;
}, $lines);
$data = [];
foreach ($lines as $line) {
if (count($line) != 2) {
continue;
}
list($request, $measurement) = $line;
$data[$request][] = $measurement;
}
return $data;
}
private function getArchivingRunId()
{
return Common::getRequestVar('runid', false);
}
private static function cleanId($id)
{
return preg_replace('/[^a-zA-Z0-9_-]/', '', $id);
}
}

View File

@ -0,0 +1,165 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
namespace Piwik\CronArchive\Performance;
class Measurement
{
/**
* @var string
*/
private $category;
/**
* @var string
*/
private $measuredName;
/**
* @var string
*/
private $idSite;
/**
* @var string
*/
private $dateRange;
/**
* @var string
*/
private $periodType;
/**
* @var string
*/
private $segment;
/**
* @var float
*/
private $time;
/**
* @var string
*/
private $memory;
/**
* @var string
*/
private $peakMemory;
public function __construct($category, $name, $idSite, $dateRange, $periodType, $segment, $time, $memory, $peakMemory)
{
$this->category = $category;
$this->measuredName = $name;
$this->idSite = $idSite;
$this->dateRange = $dateRange;
$this->periodType = $periodType;
$this->segment = trim($segment);
$this->time = $time;
$this->memory = $memory;
$this->peakMemory = $peakMemory;
}
public function __toString()
{
$parts = [
ucfirst($this->category) . ": {$this->measuredName}",
"idSite: {$this->idSite}",
"period: {$this->periodType} ({$this->dateRange})",
"segment: " . (!empty($this->segment) ? $this->segment : 'none'),
"duration: {$this->time}s",
"memory leak: {$this->memory}",
"peak memory usage: {$this->peakMemory}",
];
return implode(', ', $parts);
}
/**
* @return string
*/
public function getCategory()
{
return $this->category;
}
/**
* @param string $category
*/
public function setCategory($category)
{
$this->category = $category;
}
/**
* @return string
*/
public function getMeasuredName()
{
return $this->measuredName;
}
/**
* @param string $measuredName
*/
public function setMeasuredName($measuredName)
{
$this->measuredName = $measuredName;
}
/**
* @return string
*/
public function getIdSite()
{
return $this->idSite;
}
/**
* @param string $idSite
*/
public function setIdSite($idSite)
{
$this->idSite = $idSite;
}
/**
* @return string
*/
public function getDateRange()
{
return $this->dateRange;
}
/**
* @param string $dateRange
*/
public function setDateRange($dateRange)
{
$this->dateRange = $dateRange;
}
/**
* @return string
*/
public function getPeriodType()
{
return $this->periodType;
}
/**
* @param string $periodType
*/
public function setPeriodType($periodType)
{
$this->periodType = $periodType;
}
}

View File

@ -0,0 +1,208 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
namespace Piwik\CronArchive;
use Piwik\Cache\Cache;
use Piwik\Cache\Transient;
use Piwik\Container\StaticContainer;
use Piwik\Date;
use Piwik\Period\Factory as PeriodFactory;
use Piwik\Period\Range;
use Piwik\Plugins\SegmentEditor\Model;
use Psr\Log\LoggerInterface;
/**
* Provides URLs that initiate archiving during cron archiving for segments.
*
* Handles the `[General] process_new_segments_from` INI option.
*/
class SegmentArchivingRequestUrlProvider
{
const BEGINNING_OF_TIME = 'beginning_of_time';
const CREATION_TIME = 'segment_creation_time';
const LAST_EDIT_TIME = 'segment_last_edit_time';
/**
* @var Model
*/
private $segmentEditorModel;
/**
* @var Cache
*/
private $segmentListCache;
/**
* @var Date
*/
private $now;
private $processNewSegmentsFrom;
/**
* @var LoggerInterface
*/
private $logger;
public function __construct($processNewSegmentsFrom, Model $segmentEditorModel = null, Cache $segmentListCache = null,
Date $now = null, LoggerInterface $logger = null)
{
$this->processNewSegmentsFrom = $processNewSegmentsFrom;
$this->segmentEditorModel = $segmentEditorModel ?: new Model();
$this->segmentListCache = $segmentListCache ?: new Transient();
$this->now = $now ?: Date::factory('now');
$this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
}
public function getUrlParameterDateString($idSite, $period, $date, $segment)
{
$oldestDateToProcessForNewSegment = $this->getOldestDateToProcessForNewSegment($idSite, $segment);
if (empty($oldestDateToProcessForNewSegment)) {
return $date;
}
// if the start date for the archiving request is before the minimum date allowed for processing this segment,
// use the minimum allowed date as the start date
$periodObj = PeriodFactory::build($period, $date);
if ($periodObj->getDateStart()->getTimestamp() < $oldestDateToProcessForNewSegment->getTimestamp()) {
$this->logger->debug("Start date of archiving request period ({start}) is older than configured oldest date to process for the segment.", array(
'start' => $periodObj->getDateStart()
));
$endDate = $periodObj->getDateEnd();
// if the creation time of a segment is older than the end date of the archiving request range, we cannot
// blindly rewrite the date string, since the resulting range would be incorrect. instead we make the
// start date equal to the end date, so less archiving occurs, and no fatal error occurs.
if ($oldestDateToProcessForNewSegment->getTimestamp() > $endDate->getTimestamp()) {
$this->logger->debug("Oldest date to process is greater than end date of archiving request period ({end}), so setting oldest date to end date.", array(
'end' => $endDate
));
$oldestDateToProcessForNewSegment = $endDate;
}
$date = $oldestDateToProcessForNewSegment->toString().','.$endDate;
$this->logger->debug("Archiving request date range changed to {date} w/ period {period}.", array('date' => $date, 'period' => $period));
}
return $date;
}
private function getOldestDateToProcessForNewSegment($idSite, $segment)
{
/**
* @var Date $segmentCreatedTime
* @var Date $segmentLastEditedTime
*/
list($segmentCreatedTime, $segmentLastEditedTime) = $this->getCreatedTimeOfSegment($idSite, $segment);
if ($this->processNewSegmentsFrom == self::CREATION_TIME) {
$this->logger->debug("process_new_segments_from set to segment_creation_time, oldest date to process is {time}", array('time' => $segmentCreatedTime));
return $segmentCreatedTime;
} elseif ($this->processNewSegmentsFrom == self::LAST_EDIT_TIME) {
$this->logger->debug("process_new_segments_from set to segment_last_edit_time, segment last edit time is {time}",
array('time' => $segmentLastEditedTime));
if ($segmentLastEditedTime === null
|| $segmentLastEditedTime->getTimestamp() < $segmentCreatedTime->getTimestamp()
) {
$this->logger->debug("segment last edit time is older than created time, using created time instead");
$segmentLastEditedTime = $segmentCreatedTime;
}
return $segmentLastEditedTime;
} elseif (preg_match("/^last([0-9]+)$/", $this->processNewSegmentsFrom, $matches)) {
$lastN = $matches[1];
list($lastDate, $lastPeriod) = Range::getDateXPeriodsAgo($lastN, $segmentCreatedTime, 'day');
$result = Date::factory($lastDate);
$this->logger->debug("process_new_segments_from set to last{N}, oldest date to process is {time}", array('N' => $lastN, 'time' => $result));
return $result;
} else {
$this->logger->debug("process_new_segments_from set to beginning_of_time or cannot recognize value");
return null;
}
}
private function getCreatedTimeOfSegment($idSite, $segmentDefinition)
{
$segments = $this->getAllSegments();
/** @var Date $latestEditTime */
$latestEditTime = null;
$earliestCreatedTime = $this->now;
foreach ($segments as $segment) {
if (empty($segment['ts_created'])
|| empty($segment['definition'])
|| !isset($segment['enable_only_idsite'])
) {
continue;
}
if ($this->isSegmentForSite($segment, $idSite)
&& $segment['definition'] == $segmentDefinition
) {
// check for an earlier ts_created timestamp
$createdTime = Date::factory($segment['ts_created']);
if ($createdTime->getTimestamp() < $earliestCreatedTime->getTimestamp()) {
$earliestCreatedTime = $createdTime;
}
// if there is no ts_last_edit timestamp, initialize it to ts_created
if (empty($segment['ts_last_edit'])) {
$segment['ts_last_edit'] = $segment['ts_created'];
}
// check for a later ts_last_edit timestamp
$lastEditTime = Date::factory($segment['ts_last_edit']);
if ($latestEditTime === null
|| $latestEditTime->getTimestamp() < $lastEditTime->getTimestamp()
) {
$latestEditTime = $lastEditTime;
}
}
}
$this->logger->debug(
"Earliest created time of segment '{segment}' w/ idSite = {idSite} is found to be {createdTime}. Latest " .
"edit time is found to be {latestEditTime}.",
array(
'segment' => $segmentDefinition,
'idSite' => $idSite,
'createdTime' => $earliestCreatedTime,
'latestEditTime' => $latestEditTime,
)
);
return array($earliestCreatedTime, $latestEditTime);
}
private function getAllSegments()
{
if (!$this->segmentListCache->contains('all')) {
$segments = $this->segmentEditorModel->getAllSegmentsAndIgnoreVisibility();
$this->segmentListCache->save('all', $segments);
}
return $this->segmentListCache->fetch('all');
}
private function isSegmentForSite($segment, $idSite)
{
return $segment['enable_only_idsite'] == 0
|| $segment['enable_only_idsite'] == $idSite;
}
}

View File

@ -0,0 +1,202 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\CronArchive;
use Exception;
use Piwik\CliMulti\Process;
use Piwik\Option;
/**
* This class saves all to be processed siteIds in an Option named 'SharedSiteIdsToArchive' and processes all sites
* within that list. If a user starts multiple archiver those archiver will help to finish processing that list.
*/
class SharedSiteIds
{
const OPTION_DEFAULT = 'SharedSiteIdsToArchive';
const OPTION_ALL_WEBSITES = 'SharedSiteIdsToArchive_AllWebsites';
/**
* @var string
*/
private $optionName;
private $siteIds = array();
private $currentSiteId;
private $done = false;
private $numWebsitesLeftToProcess;
public function __construct($websiteIds, $optionName = self::OPTION_DEFAULT)
{
$this->optionName = $optionName;
if (empty($websiteIds)) {
$websiteIds = array();
}
$self = $this;
$this->siteIds = $this->runExclusive(function () use ($self, $websiteIds) {
// if there are already sites to be archived registered, prefer the list of existing archive, meaning help
// to finish this queue of sites instead of starting a new queue
$existingWebsiteIds = $self->getAllSiteIdsToArchive();
if (!empty($existingWebsiteIds)) {
return $existingWebsiteIds;
}
$self->setSiteIdsToArchive($websiteIds);
return $websiteIds;
});
$this->numWebsitesLeftToProcess = $this->getNumSites();
}
public function getInitialSiteIds()
{
return $this->siteIds;
}
/**
* Get the number of total websites that needs to be processed.
*
* @return int
*/
public function getNumSites()
{
return count($this->siteIds);
}
/**
* Get the number of already processed websites (not necessarily all of those where processed by this archiver).
*
* @return int
*/
public function getNumProcessedWebsites()
{
if ($this->done) {
return $this->getNumSites();
}
if (empty($this->currentSiteId)) {
return 0;
}
$index = array_search($this->currentSiteId, $this->siteIds);
if (false === $index) {
return 0;
}
return $index + 1;
}
public function setSiteIdsToArchive($siteIds)
{
if (!empty($siteIds)) {
Option::set($this->optionName, implode(',', $siteIds));
} else {
Option::delete($this->optionName);
}
}
public function getAllSiteIdsToArchive()
{
Option::clearCachedOption($this->optionName);
$siteIdsToArchive = Option::get($this->optionName);
if (empty($siteIdsToArchive)) {
return array();
}
return explode(',', trim($siteIdsToArchive));
}
/**
* If there are multiple archiver running on the same node it makes sure only one of them performs an action and it
* will wait until another one has finished. Any closure you pass here should be very fast as other processes wait
* for this closure to finish otherwise. Currently only used for making multiple archivers at the same time work.
* If a closure takes more than 5 seconds we assume it is dead and simply continue.
*
* @param \Closure $closure
* @return mixed
* @throws \Exception
*/
private function runExclusive($closure)
{
$process = new Process('archive.sharedsiteids');
while ($process->isRunning() && $process->getSecondsSinceCreation() < 5) {
// wait max 5 seconds, such an operation should not take longer
usleep(25 * 1000);
}
$process->startProcess();
try {
$result = $closure();
} catch (Exception $e) {
$process->finishProcess();
throw $e;
}
$process->finishProcess();
return $result;
}
/**
* Get the next site id that needs to be processed or null if all site ids where processed.
*
* @return int|null
*/
public function getNextSiteId()
{
if ($this->done) {
// we make sure we don't check again whether there are more sites to be archived as the list of
// sharedSiteIds may have been reset by now.
return null;
}
$self = $this;
$this->currentSiteId = $this->runExclusive(function () use ($self) {
$siteIds = $self->getAllSiteIdsToArchive();
if (empty($siteIds)) {
// done... no sites left to be archived
return null;
}
if (count($siteIds) > $self->numWebsitesLeftToProcess) {
// done... the number of siteIds in SharedSiteIds is larger than it was initially... therefore it must have
// been reset at some point.
return null;
}
$self->numWebsitesLeftToProcess = count($siteIds);
$nextSiteId = array_shift($siteIds);
$self->setSiteIdsToArchive($siteIds);
return $nextSiteId;
});
if (is_null($this->currentSiteId)) {
$this->done = true;
$this->numWebsitesLeftToProcess = 0;
}
return $this->currentSiteId;
}
public static function isSupported()
{
return Process::isSupported();
}
}

View File

@ -0,0 +1,40 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\CronArchive;
use Piwik\Concurrency\DistributedList;
/**
* Distributed list that stores the list of IDs of sites whose archives should be reprocessed.
*
* CronArchive will read this list of sites when archiving is being run, and make sure the sites
* are re-archived.
*
* Any class/API method/command/etc. is allowed to add site IDs to this list.
*/
class SitesToReprocessDistributedList extends DistributedList
{
const OPTION_INVALIDATED_IDSITES_TO_REPROCESS = 'InvalidatedOldReports_WebsiteIds';
public function __construct()
{
parent::__construct(self::OPTION_INVALIDATED_IDSITES_TO_REPROCESS);
}
/**
* @inheritdoc
*/
public function setAll($items)
{
$items = array_unique($items, SORT_REGULAR);
$items = array_values($items);
parent::setAll($items);
}
}