258 lines
8.5 KiB
PHP
258 lines
8.5 KiB
PHP
<?php
|
|
require_once("../config.inc.php");
|
|
header("Content-Type: text/html; charset=utf-8");
|
|
echo "<pre>";
|
|
/*
|
|
create table bibel_chapter_2017
|
|
as
|
|
SELECT DISTINCT anz_buch, buch, kapitel, NULL as verarbeitet FROM `bibel_lut_1984`;
|
|
CREATE TABLE bibel_lut_2017 SELECT * FROM `bibel_lut_1984` WHERE 1=2;
|
|
ALTER TABLE `bibel_chapter_2017` ADD `bid` INT(11) NOT NULL AUTO_INCREMENT FIRST, ADD PRIMARY KEY (`bid`);
|
|
# Die URLs stehen im Quelltext des Dropdownfeldes auf die-bibel.de
|
|
ALTER TABLE `bibel_chapter_2017` ADD `sort` INT(11) NOT NULL AFTER `bid`;
|
|
|
|
# Nachdem alle Daten geholt wurden, Chapter TAbelle anpassen für LG-ON
|
|
ALTER TABLE `bibel_chapter_2017` ADD `buch2` VARCHAR(500) NOT NULL AFTER `verarbeitet`;
|
|
UPDATE bibel_chapter_2017 set buch2 = concat(anz_buch,' ', buch, ' ',kapitel);
|
|
UPDATE `bibel_chapter_2017` set buch = buch2;
|
|
ALTER TABLE `bibel_chapter_2017` DROP `sort`, DROP `anz_buch`, DROP `kapitel`, DROP `buch2`;
|
|
UPDATE `bibel_chapter_2017` set buch = trim(buch);
|
|
*/
|
|
|
|
$db = dbconnect();
|
|
$db->query("SET NAMES 'utf8'");
|
|
|
|
$query = "SELECT bid, anz_buch, buch, kapitel, sort
|
|
FROM `bibel_chapter_2017`
|
|
WHERE (verarbeitet IS NULL or verarbeitet != 'Y')
|
|
ORDER BY bid ASC
|
|
limit 1
|
|
";
|
|
|
|
# $query = "SELECT bid, anz_buch, buch, kapitel, link
|
|
# FROM `bibel_chapter_2017`
|
|
# WHERE bid =1084
|
|
# ";
|
|
|
|
|
|
$result = $db->query($query) or die("Cannot execute query: result");
|
|
|
|
while ($row = $result->fetch_array()) {
|
|
|
|
# Für https Seiten muss man sich registrieren und mit dem VPN Programm unter Windows sich einloggen
|
|
$url = "https://www.die-bibel.de/bibeln/online-bibeln/lutherbibel-2017/bibeltext/bibel/text/lesen/stelle/$row[sort]/$row[kapitel]0001/$row[kapitel]9999/";
|
|
|
|
|
|
$handle = file_get_contents($url);
|
|
#$handle=file_get_contents("1mo1_bibeltext.htm");
|
|
#$handle=file_get_contents("ps119_bibeltext.htm");
|
|
#$handle=file_get_contents("ps1_bibeltext.htm");
|
|
#$handle=file_get_contents("manasse.htm");
|
|
#$handle=file_get_contents("judas1.htm");
|
|
#$handle=file_get_contents("philemon.htm");
|
|
#$handle=file_get_contents("sprueche1.htm");
|
|
#$handle=file_get_contents("sirach.htm");
|
|
#$handle=file_get_contents("jes43.htm");
|
|
|
|
|
|
$handle = strstr($handle, '<div class="bible-text-container">');
|
|
$handle = strstr($handle, '<nav class="bible-navigation bible-navigation--simple">', true);
|
|
|
|
# Headline H0
|
|
$pos_ab = "";
|
|
$pos_bis = "";
|
|
$handle2 = "";
|
|
$pos_ab = strpos($handle, '<li class="name">');
|
|
if ($pos_ab != '') {
|
|
$handle2 = substr($handle, $pos_ab);
|
|
$pos_bis = strpos($handle2, '</li>');
|
|
$name = substr($handle2, 0, $pos_bis);
|
|
$name = "<h1>" . str_replace('<li class="name">', '', $name) . "</h1>";
|
|
} else {
|
|
$name = "";
|
|
}
|
|
|
|
|
|
# H2
|
|
$pos_ab = "";
|
|
$pos_bis = "";
|
|
$handle2 = "";
|
|
$pos_ab = preg_match('/<h1 id="h[0-9]">/', $handle, $matches, PREG_OFFSET_CAPTURE) ? $matches[0][1] : '';
|
|
if ($pos_ab != '') {
|
|
$handle2 = substr($handle, $pos_ab);
|
|
$pos_bis = strpos($handle2, '</h1>');
|
|
$h2 = substr($handle2, 0, $pos_bis);
|
|
$h2 = "<h2>" . preg_replace('/<h1 id="h[0-9]">/', '', $h2) . "</h2>";
|
|
} else {
|
|
$h2 = "";
|
|
}
|
|
|
|
|
|
# H3
|
|
$pos_ab = "";
|
|
$pos_bis = "";
|
|
$handle2 = "";
|
|
$pos_ab = preg_match('/<h2 id="h[0-9]">/', $handle, $matches, PREG_OFFSET_CAPTURE) ? $matches[0][1] : '';
|
|
if ($pos_ab != '') {
|
|
$handle2 = substr($handle, $pos_ab);
|
|
$pos_bis = strpos($handle2, '</h2>');
|
|
$h3 = substr($handle2, 0, $pos_bis);
|
|
$h3 = "<h3>" . preg_replace('/<h2 id="h[0-9]">/', '', $h3) . "</h3>";
|
|
} else {
|
|
$h3 = "";
|
|
}
|
|
|
|
|
|
# Alle Zeilenumbrüche entfernen
|
|
$handle = preg_replace('/\\r/', '', $handle);
|
|
$handle = preg_replace('/\\n/', '', $handle);
|
|
|
|
# Mehrere Leerzeichen entfernen
|
|
$handle = preg_replace('/\s{2,}/sm', ' ', $handle, PREG_SET_ORDER); //Mehr als zwei leerzeichen entfernen
|
|
$handle = preg_replace('#\s+#', ' ', $handle);
|
|
|
|
# Kapitel entfernen
|
|
$handle = preg_replace('#(<span class="chapter">)(.*?)(</span>)#', '', $handle);
|
|
|
|
# Zeilenumbrüche bei Verse einfügen
|
|
$handle = str_replace("<p><span class=\"verse\"", "\n<p><span class=\"verse\"", $handle);
|
|
|
|
|
|
# Überschrift extrahieren <h1>
|
|
## $handle = preg_replace('#(<li class="scripture">).*?(</li>)#', '', $handle);
|
|
## $handle = preg_replace('#(<li class="name">)(.*?)(</li>)#', '<h1>$2</h1>', $handle);
|
|
|
|
|
|
# Teilüberschrift extrahieren <h3>
|
|
$handle = preg_replace('#(<h2 id=\"h[0-9]\">)(.*?)(</h2>)#', '<h3>$2</h3>', $handle);
|
|
$handle = str_replace("</h3>", "</h3>\n", $handle);
|
|
$handle = str_replace("<h3>", "\n <h3>", $handle);
|
|
|
|
#echo $handle;
|
|
|
|
# Tags entfernen
|
|
$handle = preg_replace('#(<span class="verse".*?>)(.*?)(</span>)#', '$2', $handle);
|
|
# $handle = preg_replace('#(<p>)(.*?)(</p>)#', '$2', $handle);
|
|
$handle = preg_replace('#(<div class="linebreak"></div>)#', '{LINEBREAK}', $handle);
|
|
|
|
# Hervorhebung des Worttes: HERR oder HERRN in besonder Schrift erhalten
|
|
$handle = preg_replace('#(<span class="name-of-deity">)(.*?)(</span>)#', '{NAMEOFDEITY}$2{/NAMEOFDEITY}', $handle);
|
|
|
|
|
|
$handle = str_replace("<blockquote>", " ", $handle);
|
|
# Tags entfernen
|
|
$handle = strip_tags($handle, '</p><p><h3><h2><h1><strong><em>');
|
|
|
|
#<p>1 Wohl denen.. --> 1 <p>Wohl denen
|
|
$handle = preg_replace('#(<p>)([0-9]{1,3})( )#', '$2$3$1', $handle);
|
|
|
|
|
|
|
|
# Linebreak wieder hinzufügen
|
|
$handle = preg_replace('#({LINEBREAK})#', '<div class="linebreak"></div>', $handle);
|
|
$handle = preg_replace('#({/NAMEOFDEITY})#', '</span>', $handle);
|
|
$handle = preg_replace('#({NAMEOFDEITY})#', '<span class="name-of-deity">', $handle);
|
|
|
|
# Leerzeichen nach div-Tags entfernen
|
|
$handle = str_replace("</div> ", "</div>", $handle);
|
|
|
|
# Doppeltes Fett löschen: Matth 6 z.B.
|
|
$handle = str_replace("</strong><strong>", " ", $handle);
|
|
|
|
# Doppelte Leerzeichen entfernen, wie oben, falls beim Strong entfernen doppelte dazukommen
|
|
$handle = str_replace(' ', ' ', $handle);
|
|
|
|
#echo $handle;
|
|
|
|
# Mehrere Zeilenumbrüche enternen
|
|
$handle = preg_replace('/(?:[ \t]*(?:\n|\r\n?)){2,}/', "\n", $handle);
|
|
#echo $handle;
|
|
|
|
# Bei Sirach gibt es Konstellationen wie 8 \n[7] -> Das ist aber kein Zeilenumbruch, sondern gehört zu Vers 8 zb
|
|
$handle = preg_replace('#(\ \;)(\\n)(\[)#', '$1$3', $handle);
|
|
|
|
$handle = preg_replace('#([0-9]) #', '$1| ', $handle);
|
|
|
|
|
|
|
|
$data = explode("\n", $handle); // preg_split('#\n#', $data); Please don't
|
|
# Verkorkste Überschriftszeile entfernen und dem Array neue Zeile hinzufügen
|
|
unset($data[0]);
|
|
|
|
if ($h3 != "") {
|
|
if (!in_array(" $h3", $data)) {
|
|
array_unshift($data, " $h3");
|
|
}
|
|
}
|
|
|
|
if ($h2 != "") {
|
|
array_unshift($data, " $h2");
|
|
}
|
|
|
|
if ($name != "") {
|
|
array_unshift($data, " $name");
|
|
}
|
|
|
|
|
|
# print_r($data);
|
|
|
|
for ($i = 0; $i < count($data); $i++) {
|
|
|
|
$inhalt = explode('| ', $data[$i]);
|
|
$bibelstelle = "$row[anz_buch] $row[buch] $row[kapitel]";
|
|
|
|
if (isset($inhalt[1])) {
|
|
$vers = trim($inhalt[0]);
|
|
$inhalt_neu = trim(addslashes(html_entity_decode($inhalt[1], ENT_QUOTES)));
|
|
$bibelstelle .= ", $vers";
|
|
} else {
|
|
$vers = '';
|
|
$inhalt_neu = trim(addslashes(html_entity_decode($inhalt[0], ENT_QUOTES)));
|
|
}
|
|
|
|
# $daten[$i][0] = $vers;
|
|
# $daten[$i][1] = $inhalt_neu;
|
|
|
|
# In $daten[$i][1] sind auch Teilüberschriften enthalten. Diese noch separieren
|
|
|
|
|
|
|
|
$sql1 = $db->query("INSERT INTO bibel_lut_2017 ( anz_buch
|
|
, buch
|
|
, kapitel
|
|
, vers
|
|
, bibelstelle
|
|
, inhalt
|
|
)
|
|
VALUES
|
|
( '$row[anz_buch]'
|
|
, '$row[buch]'
|
|
, '$row[kapitel]'
|
|
, '$vers'
|
|
, '$bibelstelle'
|
|
, '$inhalt_neu'
|
|
)");
|
|
}
|
|
|
|
|
|
$sql2 = $db->query("UPDATE bibel_chapter_2017
|
|
SET verarbeitet='Y'
|
|
WHERE anz_buch='$row[anz_buch]'
|
|
AND buch = '$row[buch]'
|
|
AND kapitel = '$row[kapitel]'
|
|
");
|
|
|
|
|
|
print_r($daten);
|
|
|
|
|
|
# Zufälltige Anzahl Sekunden pausieren
|
|
$random = rand(3, 10);
|
|
sleep($random);
|
|
echo "$row[anz_buch] $row[buch] $row[kapitel]<br>";
|
|
|
|
# echo "UPDATE bibel_chapter_2017 SET verarbeitet='Y' WHERE anz_buch='$row[anz_buch]' AND buch = '$row[buch]' AND kapitel = '$row[kapitel]'\t ->\t $random <br>";
|
|
|
|
}
|
|
echo "</pre>";
|
|
?>
|