Initial commit
This commit is contained in:
620
pma/libraries/classes/Plugins/Import/ImportMediawiki.php
Normal file
620
pma/libraries/classes/Plugins/Import/ImportMediawiki.php
Normal file
@ -0,0 +1,620 @@
|
||||
<?php
|
||||
/**
|
||||
* MediaWiki import plugin for phpMyAdmin
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PhpMyAdmin\Plugins\Import;
|
||||
|
||||
use PhpMyAdmin\File;
|
||||
use PhpMyAdmin\Message;
|
||||
use PhpMyAdmin\Plugins\ImportPlugin;
|
||||
use PhpMyAdmin\Properties\Plugins\ImportPluginProperties;
|
||||
use function count;
|
||||
use function explode;
|
||||
use function mb_strlen;
|
||||
use function mb_strpos;
|
||||
use function mb_substr;
|
||||
use function preg_match;
|
||||
use function str_replace;
|
||||
use function strcmp;
|
||||
use function strlen;
|
||||
use function trim;
|
||||
|
||||
/**
|
||||
* Handles the import for the MediaWiki format
|
||||
*/
|
||||
class ImportMediawiki extends ImportPlugin
|
||||
{
|
||||
/**
|
||||
* Whether to analyze tables
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $analyze;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct();
|
||||
$this->setProperties();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the import plugin properties.
|
||||
* Called in the constructor.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function setProperties()
|
||||
{
|
||||
$this->setAnalyze(false);
|
||||
if ($GLOBALS['plugin_param'] !== 'table') {
|
||||
$this->setAnalyze(true);
|
||||
}
|
||||
|
||||
$importPluginProperties = new ImportPluginProperties();
|
||||
$importPluginProperties->setText(__('MediaWiki Table'));
|
||||
$importPluginProperties->setExtension('txt');
|
||||
$importPluginProperties->setMimeType('text/plain');
|
||||
$importPluginProperties->setOptions([]);
|
||||
$importPluginProperties->setOptionsText(__('Options'));
|
||||
|
||||
$this->properties = $importPluginProperties;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles the whole import logic
|
||||
*
|
||||
* @param array $sql_data 2-element array with sql data
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function doImport(?File $importHandle = null, array &$sql_data = [])
|
||||
{
|
||||
global $error, $timeout_passed, $finished;
|
||||
|
||||
// Defaults for parser
|
||||
|
||||
// The buffer that will be used to store chunks read from the imported file
|
||||
$buffer = '';
|
||||
|
||||
// Used as storage for the last part of the current chunk data
|
||||
// Will be appended to the first line of the next chunk, if there is one
|
||||
$last_chunk_line = '';
|
||||
|
||||
// Remembers whether the current buffer line is part of a comment
|
||||
$inside_comment = false;
|
||||
// Remembers whether the current buffer line is part of a data comment
|
||||
$inside_data_comment = false;
|
||||
// Remembers whether the current buffer line is part of a structure comment
|
||||
$inside_structure_comment = false;
|
||||
|
||||
// MediaWiki only accepts "\n" as row terminator
|
||||
$mediawiki_new_line = "\n";
|
||||
|
||||
// Initialize the name of the current table
|
||||
$cur_table_name = '';
|
||||
|
||||
$cur_temp_table_headers = [];
|
||||
$cur_temp_table = [];
|
||||
|
||||
$in_table_header = false;
|
||||
|
||||
while (! $finished && ! $error && ! $timeout_passed) {
|
||||
$data = $this->import->getNextChunk($importHandle);
|
||||
|
||||
if ($data === false) {
|
||||
// Subtract data we didn't handle yet and stop processing
|
||||
$GLOBALS['offset'] -= mb_strlen($buffer);
|
||||
break;
|
||||
}
|
||||
|
||||
if ($data !== true) {
|
||||
// Append new data to buffer
|
||||
$buffer = $data;
|
||||
unset($data);
|
||||
// Don't parse string if we're not at the end
|
||||
// and don't have a new line inside
|
||||
if (mb_strpos($buffer, $mediawiki_new_line) === false) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Because of reading chunk by chunk, the first line from the buffer
|
||||
// contains only a portion of an actual line from the imported file.
|
||||
// Therefore, we have to append it to the last line from the previous
|
||||
// chunk. If we are at the first chunk, $last_chunk_line should be empty.
|
||||
$buffer = $last_chunk_line . $buffer;
|
||||
|
||||
// Process the buffer line by line
|
||||
$buffer_lines = explode($mediawiki_new_line, $buffer);
|
||||
|
||||
$full_buffer_lines_count = count($buffer_lines);
|
||||
// If the reading is not finalized, the final line of the current chunk
|
||||
// will not be complete
|
||||
if (! $finished) {
|
||||
$last_chunk_line = $buffer_lines[--$full_buffer_lines_count];
|
||||
}
|
||||
|
||||
for ($line_nr = 0; $line_nr < $full_buffer_lines_count; ++$line_nr) {
|
||||
$cur_buffer_line = trim($buffer_lines[$line_nr]);
|
||||
|
||||
// If the line is empty, go to the next one
|
||||
if ($cur_buffer_line === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$first_character = $cur_buffer_line[0];
|
||||
$matches = [];
|
||||
|
||||
// Check beginning of comment
|
||||
if (! strcmp(mb_substr($cur_buffer_line, 0, 4), '<!--')) {
|
||||
$inside_comment = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($inside_comment) {
|
||||
// Check end of comment
|
||||
if (! strcmp(mb_substr($cur_buffer_line, 0, 4), '-->')
|
||||
) {
|
||||
// Only data comments are closed. The structure comments
|
||||
// will be closed when a data comment begins (in order to
|
||||
// skip structure tables)
|
||||
if ($inside_data_comment) {
|
||||
$inside_data_comment = false;
|
||||
}
|
||||
|
||||
// End comments that are not related to table structure
|
||||
if (! $inside_structure_comment) {
|
||||
$inside_comment = false;
|
||||
}
|
||||
} else {
|
||||
// Check table name
|
||||
$match_table_name = [];
|
||||
if (preg_match(
|
||||
'/^Table data for `(.*)`$/',
|
||||
$cur_buffer_line,
|
||||
$match_table_name
|
||||
)
|
||||
) {
|
||||
$cur_table_name = $match_table_name[1];
|
||||
$inside_data_comment = true;
|
||||
|
||||
$inside_structure_comment
|
||||
= $this->mngInsideStructComm(
|
||||
$inside_structure_comment
|
||||
);
|
||||
} elseif (preg_match(
|
||||
'/^Table structure for `(.*)`$/',
|
||||
$cur_buffer_line,
|
||||
$match_table_name
|
||||
)
|
||||
) {
|
||||
// The structure comments will be ignored
|
||||
$inside_structure_comment = true;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (preg_match('/^\{\|(.*)$/', $cur_buffer_line, $matches)) {
|
||||
// Check start of table
|
||||
|
||||
// This will store all the column info on all rows from
|
||||
// the current table read from the buffer
|
||||
$cur_temp_table = [];
|
||||
|
||||
// Will be used as storage for the current row in the buffer
|
||||
// Once all its columns are read, it will be added to
|
||||
// $cur_temp_table and then it will be emptied
|
||||
$cur_temp_line = [];
|
||||
|
||||
// Helps us differentiate the header columns
|
||||
// from the normal columns
|
||||
$in_table_header = false;
|
||||
// End processing because the current line does not
|
||||
// contain any column information
|
||||
} elseif (mb_substr($cur_buffer_line, 0, 2) === '|-'
|
||||
|| mb_substr($cur_buffer_line, 0, 2) === '|+'
|
||||
|| mb_substr($cur_buffer_line, 0, 2) === '|}'
|
||||
) {
|
||||
// Check begin row or end table
|
||||
|
||||
// Add current line to the values storage
|
||||
if (! empty($cur_temp_line)) {
|
||||
// If the current line contains header cells
|
||||
// ( marked with '!' ),
|
||||
// it will be marked as table header
|
||||
if ($in_table_header) {
|
||||
// Set the header columns
|
||||
$cur_temp_table_headers = $cur_temp_line;
|
||||
} else {
|
||||
// Normal line, add it to the table
|
||||
$cur_temp_table[] = $cur_temp_line;
|
||||
}
|
||||
}
|
||||
|
||||
// Empty the temporary buffer
|
||||
$cur_temp_line = [];
|
||||
|
||||
// No more processing required at the end of the table
|
||||
if (mb_substr($cur_buffer_line, 0, 2) === '|}') {
|
||||
$current_table = [
|
||||
$cur_table_name,
|
||||
$cur_temp_table_headers,
|
||||
$cur_temp_table,
|
||||
];
|
||||
|
||||
// Import the current table data into the database
|
||||
$this->importDataOneTable($current_table, $sql_data);
|
||||
|
||||
// Reset table name
|
||||
$cur_table_name = '';
|
||||
}
|
||||
// What's after the row tag is now only attributes
|
||||
} elseif (($first_character === '|') || ($first_character === '!')) {
|
||||
// Check cell elements
|
||||
|
||||
// Header cells
|
||||
if ($first_character === '!') {
|
||||
// Mark as table header, but treat as normal row
|
||||
$cur_buffer_line = str_replace('!!', '||', $cur_buffer_line);
|
||||
// Will be used to set $cur_temp_line as table header
|
||||
$in_table_header = true;
|
||||
} else {
|
||||
$in_table_header = false;
|
||||
}
|
||||
|
||||
// Loop through each table cell
|
||||
$cells = $this->explodeMarkup($cur_buffer_line);
|
||||
foreach ($cells as $cell) {
|
||||
$cell = $this->getCellData($cell);
|
||||
|
||||
// Delete the beginning of the column, if there is one
|
||||
$cell = trim($cell);
|
||||
$col_start_chars = [
|
||||
'|',
|
||||
'!',
|
||||
];
|
||||
foreach ($col_start_chars as $col_start_char) {
|
||||
$cell = $this->getCellContent($cell, $col_start_char);
|
||||
}
|
||||
|
||||
// Add the cell to the row
|
||||
$cur_temp_line[] = $cell;
|
||||
}
|
||||
} else {
|
||||
// If it's none of the above, then the current line has a bad
|
||||
// format
|
||||
$message = Message::error(
|
||||
__('Invalid format of mediawiki input on line: <br>%s.')
|
||||
);
|
||||
$message->addParam($cur_buffer_line);
|
||||
$error = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Imports data from a single table
|
||||
*
|
||||
* @param array $table containing all table info:
|
||||
* <code> $table[0] - string
|
||||
* containing table name
|
||||
* $table[1] - array[] of
|
||||
* table headers $table[2] -
|
||||
* array[][] of table content
|
||||
* rows </code>
|
||||
* @param array $sql_data 2-element array with sql data
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @global bool $analyze whether to scan for column types
|
||||
*/
|
||||
private function importDataOneTable(array $table, array &$sql_data)
|
||||
{
|
||||
$analyze = $this->getAnalyze();
|
||||
if ($analyze) {
|
||||
// Set the table name
|
||||
$this->setTableName($table[0]);
|
||||
|
||||
// Set generic names for table headers if they don't exist
|
||||
$this->setTableHeaders($table[1], $table[2][0]);
|
||||
|
||||
// Create the tables array to be used in Import::buildSql()
|
||||
$tables = [];
|
||||
$tables[] = [
|
||||
$table[0],
|
||||
$table[1],
|
||||
$table[2],
|
||||
];
|
||||
|
||||
// Obtain the best-fit MySQL types for each column
|
||||
$analyses = [];
|
||||
$analyses[] = $this->import->analyzeTable($tables[0]);
|
||||
|
||||
$this->executeImportTables($tables, $analyses, $sql_data);
|
||||
}
|
||||
|
||||
// Commit any possible data in buffers
|
||||
$this->import->runQuery('', '', $sql_data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the table name
|
||||
*
|
||||
* @param string $table_name reference to the name of the table
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
private function setTableName(&$table_name)
|
||||
{
|
||||
global $dbi;
|
||||
|
||||
if (! empty($table_name)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$result = $dbi->fetchResult('SHOW TABLES');
|
||||
// todo check if the name below already exists
|
||||
$table_name = 'TABLE ' . (count($result) + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set generic names for table headers, if they don't exist
|
||||
*
|
||||
* @param array $table_headers reference to the array containing the headers
|
||||
* of a table
|
||||
* @param array $table_row array containing the first content row
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
private function setTableHeaders(array &$table_headers, array $table_row)
|
||||
{
|
||||
if (! empty($table_headers)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The first table row should contain the number of columns
|
||||
// If they are not set, generic names will be given (COL 1, COL 2, etc)
|
||||
$num_cols = count($table_row);
|
||||
for ($i = 0; $i < $num_cols; ++$i) {
|
||||
$table_headers[$i] = 'COL ' . ($i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the database name and additional options and calls Import::buildSql()
|
||||
* Used in PMA_importDataAllTables() and $this->importDataOneTable()
|
||||
*
|
||||
* @param array $tables structure:
|
||||
* array(
|
||||
* array(table_name, array() column_names, array()()
|
||||
* rows)
|
||||
* )
|
||||
* @param array $analyses structure:
|
||||
* $analyses = array(
|
||||
* array(array() column_types, array() column_sizes)
|
||||
* )
|
||||
* @param array $sql_data 2-element array with sql data
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @global string $db name of the database to import in
|
||||
*/
|
||||
private function executeImportTables(array &$tables, array &$analyses, array &$sql_data)
|
||||
{
|
||||
global $db;
|
||||
|
||||
// $db_name : The currently selected database name, if applicable
|
||||
// No backquotes
|
||||
// $options : An associative array of options
|
||||
[$db_name, $options] = $this->getDbnameAndOptions($db, 'mediawiki_DB');
|
||||
|
||||
// Array of SQL strings
|
||||
// Non-applicable parameters
|
||||
$create = null;
|
||||
|
||||
// Create and execute necessary SQL statements from data
|
||||
$this->import->buildSql($db_name, $tables, $analyses, $create, $options, $sql_data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces all instances of the '||' separator between delimiters
|
||||
* in a given string
|
||||
*
|
||||
* @param string $replace the string to be replaced with
|
||||
* @param string $subject the text to be replaced
|
||||
*
|
||||
* @return string with replacements
|
||||
*/
|
||||
private function delimiterReplace($replace, $subject)
|
||||
{
|
||||
// String that will be returned
|
||||
$cleaned = '';
|
||||
// Possible states of current character
|
||||
$inside_tag = false;
|
||||
$inside_attribute = false;
|
||||
// Attributes can be declared with either " or '
|
||||
$start_attribute_character = false;
|
||||
|
||||
// The full separator is "||";
|
||||
// This remembers if the previous character was '|'
|
||||
$partial_separator = false;
|
||||
|
||||
// Parse text char by char
|
||||
for ($i = 0, $iMax = strlen($subject); $i < $iMax; $i++) {
|
||||
$cur_char = $subject[$i];
|
||||
// Check for separators
|
||||
if ($cur_char === '|') {
|
||||
// If we're not inside a tag, then this is part of a real separator,
|
||||
// so we append it to the current segment
|
||||
if (! $inside_attribute) {
|
||||
$cleaned .= $cur_char;
|
||||
if ($partial_separator) {
|
||||
$inside_tag = false;
|
||||
$inside_attribute = false;
|
||||
}
|
||||
} elseif ($partial_separator) {
|
||||
// If we are inside a tag, we replace the current char with
|
||||
// the placeholder and append that to the current segment
|
||||
$cleaned .= $replace;
|
||||
}
|
||||
|
||||
// If the previous character was also '|', then this ends a
|
||||
// full separator. If not, this may be the beginning of one
|
||||
$partial_separator = ! $partial_separator;
|
||||
} else {
|
||||
// If we're inside a tag attribute and the current character is
|
||||
// not '|', but the previous one was, it means that the single '|'
|
||||
// was not appended, so we append it now
|
||||
if ($partial_separator && $inside_attribute) {
|
||||
$cleaned .= '|';
|
||||
}
|
||||
// If the char is different from "|", no separator can be formed
|
||||
$partial_separator = false;
|
||||
|
||||
// any other character should be appended to the current segment
|
||||
$cleaned .= $cur_char;
|
||||
|
||||
if ($cur_char === '<' && ! $inside_attribute) {
|
||||
// start of a tag
|
||||
$inside_tag = true;
|
||||
} elseif ($cur_char === '>' && ! $inside_attribute) {
|
||||
// end of a tag
|
||||
$inside_tag = false;
|
||||
} elseif (($cur_char === '"' || $cur_char == "'") && $inside_tag) {
|
||||
// start or end of an attribute
|
||||
if (! $inside_attribute) {
|
||||
$inside_attribute = true;
|
||||
// remember the attribute`s declaration character (" or ')
|
||||
$start_attribute_character = $cur_char;
|
||||
} else {
|
||||
if ($cur_char == $start_attribute_character) {
|
||||
$inside_attribute = false;
|
||||
// unset attribute declaration character
|
||||
$start_attribute_character = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $cleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Separates a string into items, similarly to explode
|
||||
* Uses the '||' separator (which is standard in the mediawiki format)
|
||||
* and ignores any instances of it inside markup tags
|
||||
* Used in parsing buffer lines containing data cells
|
||||
*
|
||||
* @param string $text text to be split
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function explodeMarkup($text)
|
||||
{
|
||||
$separator = '||';
|
||||
$placeholder = "\x00";
|
||||
|
||||
// Remove placeholder instances
|
||||
$text = str_replace($placeholder, '', $text);
|
||||
|
||||
// Replace instances of the separator inside HTML-like
|
||||
// tags with the placeholder
|
||||
$cleaned = $this->delimiterReplace($placeholder, $text);
|
||||
// Explode, then put the replaced separators back in
|
||||
$items = explode($separator, $cleaned);
|
||||
foreach ($items as $i => $str) {
|
||||
$items[$i] = str_replace($placeholder, $separator, $str);
|
||||
}
|
||||
|
||||
return $items;
|
||||
}
|
||||
|
||||
/* ~~~~~~~~~~~~~~~~~~~~ Getters and Setters ~~~~~~~~~~~~~~~~~~~~ */
|
||||
|
||||
/**
|
||||
* Returns true if the table should be analyzed, false otherwise
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
private function getAnalyze()
|
||||
{
|
||||
return $this->analyze;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets to true if the table should be analyzed, false otherwise
|
||||
*
|
||||
* @param bool $analyze status
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
private function setAnalyze($analyze)
|
||||
{
|
||||
$this->analyze = $analyze;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cell
|
||||
*
|
||||
* @param string $cell Cell
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
private function getCellData($cell)
|
||||
{
|
||||
// A cell could contain both parameters and data
|
||||
$cell_data = explode('|', $cell, 2);
|
||||
|
||||
// A '|' inside an invalid link should not
|
||||
// be mistaken as delimiting cell parameters
|
||||
if (mb_strpos($cell_data[0], '[[') === false) {
|
||||
return $cell;
|
||||
}
|
||||
|
||||
if (count($cell_data) === 1) {
|
||||
return $cell_data[0];
|
||||
}
|
||||
|
||||
return $cell_data[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Manage $inside_structure_comment
|
||||
*
|
||||
* @param bool $inside_structure_comment Value to test
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
private function mngInsideStructComm($inside_structure_comment)
|
||||
{
|
||||
// End ignoring structure rows
|
||||
if ($inside_structure_comment) {
|
||||
$inside_structure_comment = false;
|
||||
}
|
||||
|
||||
return $inside_structure_comment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cell content
|
||||
*
|
||||
* @param string $cell Cell
|
||||
* @param string $col_start_char Start char
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function getCellContent($cell, $col_start_char)
|
||||
{
|
||||
if (mb_strpos($cell, $col_start_char) === 0) {
|
||||
$cell = trim(mb_substr($cell, 1));
|
||||
}
|
||||
|
||||
return $cell;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user