[GovTrackBridge] Rebase on top of official RSS feed (#4429)

This commit is contained in:
July 2025-01-29 05:11:25 -05:00 committed by GitHub
parent bf431a6eae
commit 3ae7a10223
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,6 +1,6 @@
<?php <?php
class GovTrackBridge extends BridgeAbstract class GovTrackBridge extends FeedExpander
{ {
const NAME = 'GovTrack'; const NAME = 'GovTrack';
const MAINTAINER = 'phantop'; const MAINTAINER = 'phantop';
@ -18,64 +18,50 @@ class GovTrackBridge extends BridgeAbstract
'Major Legislative Activity' => 'major-bill-activity', 'Major Legislative Activity' => 'major-bill-activity',
'New Bills and Resolutions' => 'introduced-bills', 'New Bills and Resolutions' => 'introduced-bills',
'New Laws' => 'enacted-bills', 'New Laws' => 'enacted-bills',
'Posts from Us' => 'posts' 'News from Us' => 'posts'
] ]
], ],
'limit' => self::LIMIT 'limit' => self::LIMIT
]]; ]];
public function collectData() public function collectData()
{ {
$html = getSimpleHTMLDOMCached($this->getURI()); $limit = $this->getInput('limit') ?? 15;
if ($this->getInput('feed') != 'posts') { if ($this->getInput('feed') == 'posts') {
$this->collectEvent($html); $this->collectExpandableDatas($this->getURI() . '.rss', $limit);
return; } else {
} $this->collectEvent($this->getURI(), $limit);
$html = defaultLinkTo($html, parent::getURI());
$limit = $this->getInput('limit') ?? 10;
foreach ($html->find('section') as $element) {
if (--$limit == 0) {
break;
}
$info = explode(' ', $element->find('p', 0)->innertext);
$item = [
'categories' => [implode(' ', array_slice($info, 4))],
'timestamp' => strtotime(implode(' ', array_slice($info, 0, 3))),
'title' => $element->find('a', 0)->innertext,
'uri' => $element->find('a', 0)->href,
];
$html = getSimpleHTMLDOMCached($item['uri']);
$html = defaultLinkTo($html, parent::getURI());
$content = $html->find('#content .col-md', 1);
$info = explode(' by ', $content->find('p', 0)->plaintext);
$content->removeChild($content->firstChild());
$item['author'] = implode(' ', array_slice($info, 1));
$item['content'] = $content->innertext;
$this->items[] = $item;
} }
} }
private function collectEvent($html) protected function parseItem(array $item)
{ {
$opt = []; $html = getSimpleHTMLDOMCached($item['uri']);
preg_match('/"csrfmiddlewaretoken" value="(.*)"/', $html, $opt); $html = defaultLinkTo($html, parent::getURI());
$item['categories'] = [$html->find('.breadcrumb-item', 1)->plaintext];
$content = $html->find('#content .col-md', 1);
$item['author'] = explode(' by ', $content->firstChild()->plaintext)[1];
$content->removeChild($content->firstChild());
$item['content'] = $content->innertext;
return $item;
}
private function collectEvent($uri, $limit)
{
$html = getSimpleHTMLDOMCached($uri);
preg_match('/"csrfmiddlewaretoken" value="(.*)"/', $html, $preg);
$header = [ $header = [
"cookie: csrftoken=$opt[1]", "cookie: csrftoken=$preg[1]",
"x-csrftoken: $opt[1]", "x-csrftoken: $preg[1]",
'referer: ' . parent::getURI(), 'referer: ' . parent::getURI(),
]; ];
preg_match('/var selected_feed = "(.*)";/', $html, $opt); preg_match('/var selected_feed = "(.*)";/', $html, $preg);
$post = [ $opt = [ CURLOPT_POSTFIELDS => [
'count' => $this->getInput('limit') ?? 20, 'count' => $limit,
'feed' => $opt[1] 'feed' => $preg[1]
]; ]];
$opt = [ CURLOPT_POSTFIELDS => $post ];
$html = getContents(parent::getURI() . 'events/_load_events', $header, $opt); $html = getContents(parent::getURI() . 'events/_load_events', $header, $opt);
$html = defaultLinkTo(str_get_html($html), parent::getURI()); $html = defaultLinkTo(str_get_html($html), parent::getURI());
@ -83,10 +69,10 @@ class GovTrackBridge extends BridgeAbstract
foreach ($html->find('.tracked_event') as $event) { foreach ($html->find('.tracked_event') as $event) {
$bill = $event->find('.event_title a, .event_body a', 0); $bill = $event->find('.event_title a, .event_body a', 0);
$date = explode(' ', $event->find('.event_date', 0)->plaintext); $date = explode(' ', $event->find('.event_date', 0)->plaintext);
preg_match('/Sponsor:(.*)\n/', $event->plaintext, $opt); preg_match('/Sponsor:(.*)\n/', $event->plaintext, $preg);
$item = [ $item = [
'author' => $opt[1] ?? '', 'author' => $preg[1] ?? '',
'content' => $event->find('td', 1)->innertext, 'content' => $event->find('td', 1)->innertext,
'enclosures' => [$event->find('img', 0)->src], 'enclosures' => [$event->find('img', 0)->src],
'timestamp' => strtotime(implode(' ', array_slice($date, 2))), 'timestamp' => strtotime(implode(' ', array_slice($date, 2))),
@ -115,10 +101,10 @@ class GovTrackBridge extends BridgeAbstract
public function getURI() public function getURI()
{ {
if ($this->getInput('feed') != 'posts') { if ($this->getInput('feed') == 'posts') {
$url = parent::getURI() . 'events/' . $this->getInput('feed');
} else {
$url = parent::getURI() . $this->getInput('feed'); $url = parent::getURI() . $this->getInput('feed');
} else {
$url = parent::getURI() . 'events/' . $this->getInput('feed');
} }
return $url; return $url;
} }