2023-01-23 11:03:31 +01:00

275 lines
10 KiB
PHP

<?php
/**
* Matomo - free/libre analytics platform
*
* @link https://matomo.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\Plugins\PrivacyManager\Dao;
use Piwik\Common;
use Piwik\Db;
use Piwik\DbHelper;
use Piwik\Network\IP;
use Piwik\Plugins\PrivacyManager\Config;
use Piwik\Plugins\PrivacyManager\IPAnonymizer;
use Piwik\Plugins\PrivacyManager\Tracker\RequestProcessor;
use Piwik\Plugins\UserCountry\LocationProvider;
use Piwik\Plugins\UserCountry\VisitorGeolocator;
use Piwik\SettingsPiwik;
use Piwik\Tracker\Model;
use Exception;
class LogDataAnonymizer
{
const NUM_ROWS_UPDATE_AT_ONCE = 10000;
protected $COLUMNS_BLACKLISTED = array('idvisit', 'idvisitor', 'idsite', 'visit_last_action_time', 'config_id', 'location_ip', 'idlink_va', 'server_time', 'idgoal', 'buster', 'idorder');
/**
* @var string
*/
private $logVisitTable;
public function __construct()
{
$this->logVisitTable = Common::prefixTable('log_visit');
}
public function anonymizeVisitInformation($idSites, $startDate, $endDate, $anonymizeIp, $anonimizeLocation, $anonymizeUserId)
{
if (!$anonymizeIp && !$anonimizeLocation && !$anonymizeUserId) {
return 0; // nothing to do
}
if (empty($idSites)) {
$idSites = $this->getAllIdSitesString($this->logVisitTable);
} else {
$idSites = array_map('intval', $idSites);
}
if (empty($idSites)) {
return 0; // no visit tracked yet, the idsite in() would otherwise fail
}
$idSites = implode(', ', $idSites);
$numVisitsToUpdate = $this->getNumVisitsInTimeRange($idSites, $startDate, $endDate);
if (empty($numVisitsToUpdate)) {
return 0;
}
$privacyConfig = new Config();
$minimumIpAddressMaskLength = 2;
$ipMask = max($minimumIpAddressMaskLength, $privacyConfig->ipAddressMaskLength);
$numRecordsUpdated = 0;
$trackerModel = new Model();
$geolocator = new VisitorGeolocator();
for ($i = 0; $i < $numVisitsToUpdate; $i = $i + self::NUM_ROWS_UPDATE_AT_ONCE) {
$offset = $i;
$limit = self::NUM_ROWS_UPDATE_AT_ONCE;
if (($offset + $limit) > $numVisitsToUpdate) {
$limit = $numVisitsToUpdate % $limit;
}
$sql = sprintf('SELECT idsite, idvisit, location_ip, user_id, location_longitude, location_latitude, location_city, location_region, location_country FROM %s WHERE idsite in (%s) and visit_last_action_time >= ? and visit_last_action_time <= ? ORDER BY idsite, visit_last_action_time, idvisit LIMIT %d OFFSET %d', $this->logVisitTable, $idSites, $limit, $offset);
$rows = Db::query($sql, array($startDate, $endDate))->fetchAll();
foreach ($rows as $row) {
$ipObject = IP::fromBinaryIP($row['location_ip']);
$ipString = $ipObject->toString();
$ipAnonymized = IPAnonymizer::applyIPMask($ipObject, $ipMask);
$update = array();
if ($anonymizeIp) {
if ($ipString !== $ipAnonymized->toString()) {
// needs updating
$update['location_ip'] = $ipAnonymized->toBinary();
}
}
if ($anonymizeUserId && isset($row['user_id']) && $row['user_id'] !== false && $row['user_id'] !== '') {
$update['user_id'] = RequestProcessor::anonymizeUserId($row['user_id']);
}
if ($anonimizeLocation) {
$location = $geolocator->getLocation(array('ip' => $ipAnonymized->toString()));
$keys = array(
'location_longitude' => LocationProvider::LONGITUDE_KEY,
'location_latitude' => LocationProvider::LATITUDE_KEY,
'location_city' => LocationProvider::CITY_NAME_KEY,
'location_region' => LocationProvider::REGION_CODE_KEY,
'location_country' => LocationProvider::COUNTRY_CODE_KEY,
);
foreach ($keys as $name => $val) {
$newLocationData = null;
if (isset($location[$val]) && $location[$val] !== false) {
$newLocationData = $location[$val];
}
if ($newLocationData !== $row[$name]) {
$update[$name] = $newLocationData;
}
}
}
if (!empty($update)) {
$trackerModel->updateVisit($row['idsite'], $row['idvisit'], $update);
$numRecordsUpdated++;
}
}
unset($rows);
}
return $numRecordsUpdated;
}
public function unsetLogVisitTableColumns($idSites, $startDate, $endDate, $columns)
{
return $this->unsetLogTableColumns('log_visit', 'visit_last_action_time', $idSites, $startDate, $endDate, $columns);
}
public function unsetLogConversionTableColumns($idSites, $startDate, $endDate, $visitColumns)
{
$columnsToUnset = array();
$table = 'log_conversion';
$logTableFields = $this->getAvailableColumnsWithDefaultValue(Common::prefixTable($table));
foreach ($visitColumns as $column) {
// we do not fail if a specified column does not exist here as this is applied to visit columns
// and some visit columns may not exist in log_conversion. We do not want to fail in this case
if (array_key_exists($column, $logTableFields)) {
$columnsToUnset[] = $column;
}
}
return $this->unsetLogTableColumns($table, 'server_time', $idSites, $startDate, $endDate, $columnsToUnset);
}
public function unsetLogLinkVisitActionColumns($idSites, $startDate, $endDate, $columns)
{
return $this->unsetLogTableColumns('log_link_visit_action', 'server_time', $idSites, $startDate, $endDate, $columns);
}
public function checkAllVisitColumns($visitColumns)
{
$this->areAllColumnsValid('log_visit', $visitColumns);
return null;
}
public function checkAllLinkVisitActionColumns($linkVisitActionColumns)
{
$this->areAllColumnsValid('log_link_visit_action', $linkVisitActionColumns);
return null;
}
public function getAvailableVisitColumnsToAnonymize()
{
return $this->getAvailableColumnsWithDefaultValue(Common::prefixTable('log_visit'));
}
public function getAvailableLinkVisitActionColumnsToAnonymize()
{
return $this->getAvailableColumnsWithDefaultValue(Common::prefixTable('log_link_visit_action'));
}
private function areAllColumnsValid($table, $columns)
{
if (empty($columns)) {
return;
}
$table = Common::prefixTable($table);
$logTableFields = $this->getAvailableColumnsWithDefaultValue($table);
foreach ($columns as $column) {
if (!array_key_exists($column, $logTableFields)) {
throw new Exception(sprintf('The column "%s" seems to not exist in %s or cannot be unset. Use one of %s', $column, $table, implode(', ', array_keys($logTableFields))));
}
}
}
private function unsetLogTableColumns($table, $dateColumn, $idSites, $startDate, $endDate, $columns)
{
if (empty($columns)) {
return 0;
}
$table = Common::prefixTable($table);
if (empty($idSites)) {
$idSites = $this->getAllIdSitesString($table);
} else {
$idSites = array_map('intval', $idSites);
}
if (empty($idSites)) {
return 0; // no visit tracked yet, the idsite in() would otherwise fail
}
$idSites = implode(', ', $idSites);
$logTableFields = $this->getAvailableColumnsWithDefaultValue($table);
$col = [];
$bind = [];
foreach ($columns as $column) {
if (!array_key_exists($column, $logTableFields)) {
throw new Exception(sprintf('The column "%s" cannot be unset because it has no default value or it does not exist in "%s". Use one of %s', $column, $table, implode(', ', array_keys($logTableFields))));
}
$col[] = $column . ' = ?';
$bind[] = $logTableFields[$column];
}
$col = implode(',', $col);
$bind[] = $startDate;
$bind[] = $endDate;
$sql = sprintf('UPDATE %s SET %s WHERE idsite in (%s) and %s >= ? and %s <= ?', $table, $col, $idSites, $dateColumn, $dateColumn);
return Db::query($sql, $bind)->rowCount();
}
private function getNumVisitsInTimeRange($idSites, $startDate, $endDate)
{
$sql = sprintf('SELECT count(*) FROM %s WHERE idsite in (%s) and visit_last_action_time >= ? and visit_last_action_time <= ?', $this->logVisitTable, $idSites);
$numVisits = Db::query($sql, array($startDate, $endDate))->fetchColumn();
return $numVisits;
}
private function getAvailableColumnsWithDefaultValue($table)
{
$columns = DbHelper::getTableColumns($table);
$values = array();
foreach ($columns as $column => $config) {
$hasDefaultKey = array_key_exists('Default', $config);
if (in_array($column, $this->COLUMNS_BLACKLISTED, true)) {
continue;
} elseif (strtoupper($config['Null']) === 'NO' && $hasDefaultKey && $config['Default'] === null) {
// we cannot unset this column as it may result in an error or random data
continue;
} elseif ($hasDefaultKey) {
$values[$column] = $config['Default'];
} elseif (strtoupper($config['Null']) === 'YES') {
$values[$column] = null;
}
}
return $values;
}
private function getAllIdSitesString($table)
{
// we need the idSites in order to use the index
$sites = Db::query(sprintf('SELECT DISTINCT idsite FROM %s', $table))->fetchAll();
$idSites = array();
foreach ($sites as $site) {
$idSites[] = (int) $site['idsite'];
}
return $idSites;
}
}