diff --git a/bridges/InstituteForTheStudyOfWarBridge.php b/bridges/InstituteForTheStudyOfWarBridge.php new file mode 100644 index 00000000..be568dc0 --- /dev/null +++ b/bridges/InstituteForTheStudyOfWarBridge.php @@ -0,0 +1,64 @@ + [ + 'searchURL' => [ + 'name' => 'Filter URL', + 'required' => false, + 'title' => 'Set a filter on https://www.understandingwar.org/publications and copy the URL parameters.' + ], + ] + ]; + + public function collectData() + { + $filter = $this->getInput('searchURL'); + $html = getSimpleHTMLDOM(self::URI . '/publications?' . $filter); + $entries = $html->find('.view-content', 0); + foreach ($entries->find('.views-row') as $entry) { + $this->items[] = $this->processEntry($entry); + } + } + + private function processEntry($entry) + { + $h2 = $entry->find('h2', 0); + $title = $h2->innertext; + $uri = $h2->find('a', 0)->href; + + $date_span = $entry->find('span.datespan', 0); + list($date_string, $user) = explode('-', $date_span->innertext); + $date = DateTime::createFromFormat('F d, Y', trim($date_string)); + + $html = getSimpleHTMLDOMCached(self::URI . $uri); + $content = $html->find('[property=content:encoded]', 0)->innertext; + + $enclosures = []; + $pdfs_list = $html->find('.field-name-field-pdf-report', 0); + if ($pdfs_list != null) { + foreach ($pdfs_list->find('.field-item') as $pdf_item) { + $a = $pdf_item->find('a', 0); + array_push($enclosures, $a->href); + } + } + + return [ + 'uri' => self::URI . $uri, + 'title' => $title, + 'uid' => $uri, + 'author' => trim($user), + 'timestamp' => $date->getTimestamp(), + 'content' => $content, + 'enclosures' => $enclosures + ]; + } +}