Initial commit
This commit is contained in:
163
fetch_bibel/#func_get_lut1984_diebibel.php
Normal file
163
fetch_bibel/#func_get_lut1984_diebibel.php
Normal file
@ -0,0 +1,163 @@
|
||||
<?php
|
||||
require_once("config.inc.php");
|
||||
header("Content-Type: text/html; charset=utf-8");
|
||||
/*
|
||||
create table bibel_chapter_1984_html
|
||||
as
|
||||
SELECT DISTINCT anz_buch, buch, kapitel, NULL as verarbeitet FROM `bibel_lut_1984`;
|
||||
ALTER TABLE `bibel_lut_1984_html` ADD PRIMARY KEY(`bid`);
|
||||
ALTER TABLE `bibel_lut_1984_html` CHANGE `bid` `bid` INT(11) NOT NULL AUTO_INCREMENT;
|
||||
ALTER TABLE `bibel_chapter_1984` CHANGE `verarbeitet` `verarbeitet` VARCHAR(1) NULL DEFAULT NULL;
|
||||
ALTER TABLE `bibel_lut_1984_html` CHANGE `anz_buch` `anz_buch` VARCHAR(2) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL, CHANGE `buch` `buch` VARCHAR(100) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL, CHANGE `vers` `vers` VARCHAR(100) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL, CHANGE `bibelstelle` `bibelstelle` VARCHAR(100) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL, CHANGE `inhalt` `inhalt` TEXT CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL;
|
||||
*/
|
||||
|
||||
$db = dbconnect();
|
||||
$db->query("SET NAMES 'utf8'");
|
||||
|
||||
|
||||
$query = "SELECT cid, anz_buch, buch, kapitel, link
|
||||
FROM `bibel_chapter_1984_html`
|
||||
WHERE verarbeitet IS NULL
|
||||
ORDER BY cid ASC
|
||||
";
|
||||
|
||||
# $query = "SELECT cid, anz_buch, buch, kapitel, link
|
||||
# FROM `bibel_chapter_1984_html`
|
||||
# WHERE cid =1084
|
||||
# ";
|
||||
|
||||
|
||||
$result = $db->query( $query)
|
||||
or die ("Cannot execute query: result");
|
||||
|
||||
while ($row = $result->fetch_array()){
|
||||
|
||||
#$url= "https://www.die-bibel.de/bibeln/online-bibeln/lutherbibel-1984/bibeltext/bibel/text/lesen/stelle/58/10001/19999/";
|
||||
|
||||
$handle = file_get_contents($row['link']);
|
||||
|
||||
$handle = strstr($handle, '<div class="bible-text-container">');
|
||||
$handle = strstr($handle, '<nav class="bible-navigation bible-navigation--simple">', true);
|
||||
|
||||
# Alle Zeilenumbrüche entfernen
|
||||
$handle = preg_replace('/\\r/', '', $handle);
|
||||
$handle = preg_replace('/\\n/', '', $handle);
|
||||
|
||||
# Mehrere Leerzeichen entfernen
|
||||
$handle= preg_replace('/\s{2,}/sm',' ',$handle,PREG_SET_ORDER); //Mehr als zwei leerzeichen entfernen
|
||||
$handle = preg_replace ('#\s+#' , ' ' , $handle);
|
||||
|
||||
# Zeilenumbrüche bei Verse einfügen
|
||||
$handle = str_replace("<span class=\"verse\"", "\n<span class=\"verse\"", $handle);
|
||||
|
||||
|
||||
|
||||
# Überschrift extrahieren <h1>
|
||||
$handle = preg_replace('#(<li class="scripture">).*?(</li>)#', '', $handle);
|
||||
$handle = preg_replace('#(<li class="name">)(.*?)(</li>)#', '<h1>$2</h1>', $handle);
|
||||
|
||||
|
||||
# Teilüberschrift extrahieren <h3>
|
||||
$handle = preg_replace('#(<h1 id=\"h[0-9]\">)(.*?)(</h1>)#', '<h3>$2</h3>', $handle);
|
||||
$handle = str_replace("</h3>","</h3>\n",$handle);
|
||||
$handle = str_replace("<h3>","\n<h3>",$handle);
|
||||
|
||||
# Kapitel entfernen
|
||||
$handle = preg_replace('#(<span class="chapter">)(.*?)(</span>)#', '', $handle);
|
||||
|
||||
# Tags entfernen
|
||||
$handle = preg_replace('#(<span class="verse".*?>)(.*?)(</span>)#', '$2', $handle);
|
||||
$handle = preg_replace('#(<p>)(.*?)(</p>)#', '$2', $handle);
|
||||
$handle = preg_replace('#(<div class="linebreak"></div>)#', '{LINEBREAK}', $handle);
|
||||
|
||||
# Hervorhebung des Worttes: HERR oder HERRN in besonder Schrift erhalten
|
||||
$handle = preg_replace('#(<span class="name-of-deity">)(.*?)(</span>)#', '{NAMEOFDEITY}$2{/NAMEOFDEITY}', $handle);
|
||||
|
||||
|
||||
# Tags entfernen
|
||||
$handle = strip_tags($handle, '<h3><h1><strong><em>');
|
||||
|
||||
|
||||
|
||||
# Linebreak wieder hinzufügen
|
||||
$handle = preg_replace('#({LINEBREAK})#', '<div class="linebreak"></div>', $handle);
|
||||
$handle = preg_replace('#({/NAMEOFDEITY})#', '</span>', $handle);
|
||||
$handle = preg_replace('#({NAMEOFDEITY})#', '<span class="name-of-deity">', $handle);
|
||||
|
||||
# Leerzeichen nach div-Tags entfernen
|
||||
$handle = str_replace("</div> ","</div>",$handle);
|
||||
|
||||
# Doppeltes Fett löschen: Matth 6 z.B.
|
||||
$handle = str_replace("</strong><strong>"," ",$handle);
|
||||
|
||||
# Doppelte Leerzeichen entfernen, wie oben, falls beim Strong entfernen doppelte dazukommen
|
||||
$handle = str_replace (' ' , ' ' , $handle);
|
||||
|
||||
#echo $handle;
|
||||
|
||||
# Mehrere Zeilenumbrüche enternen
|
||||
$handle = preg_replace('/(?:[ \t]*(?:\n|\r\n?)){2,}/', "\n", $handle);
|
||||
|
||||
|
||||
# Bei Sirach gibt es Konstellationen wie 8 \n[7] -> Das ist aber kein Zeilenumbruch, sondern gehört zu Vers 8 zb
|
||||
$handle = preg_replace('#(\ \;)(\\n)(\[)#', '$1$3', $handle);
|
||||
#echo $handle;
|
||||
|
||||
|
||||
$data = explode("\n", $handle); // preg_split('#\n#', $data); Please don't
|
||||
|
||||
# print_r($data);
|
||||
|
||||
for($i=1; $i < count($data)-1; $i++){
|
||||
$inhalt = explode(' ', $data[$i]);
|
||||
$bibelstelle = "$row[anz_buch] $row[buch] $row[kapitel]";
|
||||
|
||||
if(isset($inhalt[1])){
|
||||
$vers = trim($inhalt[0]);
|
||||
$inhalt_neu = trim(addslashes(html_entity_decode ($inhalt[1],ENT_QUOTES)));
|
||||
$bibelstelle .= ", $vers";
|
||||
}else{
|
||||
$vers = '';
|
||||
$inhalt_neu = trim(addslashes(html_entity_decode($inhalt[0],ENT_QUOTES)));
|
||||
}
|
||||
|
||||
$daten[$i][0] = $vers;
|
||||
$daten[$i][1] = $inhalt_neu;
|
||||
|
||||
|
||||
$sql1 = $db->query("INSERT INTO bibel_lut_1984_html ( anz_buch
|
||||
, buch
|
||||
, kapitel
|
||||
, vers
|
||||
, bibelstelle
|
||||
, inhalt
|
||||
)
|
||||
VALUES
|
||||
( '$row[anz_buch]'
|
||||
, '$row[buch]'
|
||||
, '$row[kapitel]'
|
||||
, '$vers'
|
||||
, '$bibelstelle'
|
||||
, '$inhalt_neu'
|
||||
)"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
$sql2 = $db->query("UPDATE bibel_chapter_1984_html
|
||||
SET verarbeitet='Y'
|
||||
WHERE anz_buch='$row[anz_buch]'
|
||||
AND buch = '$row[buch]'
|
||||
AND kapitel = '$row[kapitel]'
|
||||
");
|
||||
|
||||
#print_r($daten);
|
||||
|
||||
# Zufälltige Anzahl Sekunden pausieren
|
||||
$random = rand(3,10);
|
||||
sleep($random);
|
||||
|
||||
echo "UPDATE bibel_chapter_1984_html SET verarbeitet='Y' WHERE anz_buch='$row[anz_buch]' AND buch = '$row[buch]' AND kapitel = '$row[kapitel]'\t ->\t $random <br>";
|
||||
|
||||
}
|
||||
?>
|
Reference in New Issue
Block a user