Compare commits

..

No commits in common. "master" and "2023-07-13" have entirely different histories.

500 changed files with 17034 additions and 25545 deletions

View File

@ -12,6 +12,6 @@ server {
location ~ \.php$ { location ~ \.php$ {
include snippets/fastcgi-php.conf; include snippets/fastcgi-php.conf;
fastcgi_pass unix:/var/run/php/php8.2-fpm.sock; fastcgi_pass 127.0.0.1:9000;
} }
} }

2
.gitattributes vendored
View File

@ -47,6 +47,8 @@ phpcs.xml export-ignore
phpcompatibility.xml export-ignore phpcompatibility.xml export-ignore
tests/ export-ignore tests/ export-ignore
cache/.gitkeep export-ignore cache/.gitkeep export-ignore
bridges/DemoBridge.php export-ignore
bridges/FeedExpanderExampleBridge.php export-ignore
## Composer ## Composer
# #

7
.github/.gitignore vendored
View File

@ -1,7 +0,0 @@
# Visual Studio Code
.vscode/*
# Generated files
comment*.md
comment*.txt
*.html

View File

@ -49,9 +49,9 @@ Please describe what you expect from the bridge. Whenever possible provide sampl
- _Default limit_: 5 - _Default limit_: 5
- [ ] Load full articles - [ ] Load full articles
- _Cache articles_ (articles are stored in a local cache on first request): yes - _Cache articles_ (articles are stored in a local cache on first request): yes
- _Cache timeout_ : 24 hours - _Cache timeout_ (max = 24 hours): 24 hours
- [X] Balance requests (RSS-Bridge uses cached versions to reduce bandwith usage) - [X] Balance requests (RSS-Bridge uses cached versions to reduce bandwith usage)
- _Timeout_ (default = 5 minutes): 5 minutes - _Timeout_ (default = 5 minutes, max = 24 hours): 5 minutes
<!--Be aware that some options might not be available for your specific request due to technical limitations!--> <!--Be aware that some options might not be available for your specific request due to technical limitations!-->

235
.github/prtester.py vendored
View File

@ -1,110 +1,59 @@
import argparse
import requests import requests
import re import itertools
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from datetime import datetime from datetime import datetime
from typing import Iterable import os.path
import os
import glob
import urllib
# This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge # This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge
# #
# This will scrape the whitelisted bridges in the current state (port 3000) and the PR state (port 3001) of # This will scrape the whitelisted bridges in the current state (port 3000) and the PR state (port 3001) of
# RSS-Bridge, generate a feed for each of the bridges and save the output as html files. # RSS-Bridge, generate a feed for each of the bridges and save the output as html files.
# It also add a <base> tag with the url of em's public instance, so viewing # It also replaces the default static CSS link with a hardcoded link to @em92's public instance, so viewing
# the HTML file locally will actually work as designed. # the HTML file locally will actually work as designed.
ARTIFACT_FILE_EXTENSION = '.html' def testBridges(bridges,status):
for bridge in bridges:
class Instance: if bridge.get('data-ref'): # Some div entries are empty, this ignores those
name = '' bridgeid = bridge.get('id')
url = ''
def main(instances: Iterable[Instance], with_upload: bool, with_reduced_upload: bool, title: str, output_file: str):
start_date = datetime.now()
prid = os.getenv('PR')
artifact_base_url = f'https://rss-bridge.github.io/rss-bridge-tests/prs/{prid}'
artifact_directory = os.getcwd()
for file in glob.glob(f'*{ARTIFACT_FILE_EXTENSION}', root_dir=artifact_directory):
os.remove(file)
table_rows = []
for instance in instances:
page = requests.get(instance.url) # Use python requests to grab the rss-bridge main page
soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup
bridge_cards = soup.select('.bridge-card') # get a soup-formatted list of all bridges on the rss-bridge page
table_rows += testBridges(
instance=instance,
bridge_cards=bridge_cards,
with_upload=with_upload,
with_reduced_upload=with_reduced_upload,
artifact_directory=artifact_directory,
artifact_base_url=artifact_base_url) # run the main scraping code with the list of bridges
with open(file=output_file, mode='w+', encoding='utf-8') as file:
table_rows_value = '\n'.join(sorted(table_rows))
file.write(f'''
## {title}
| Bridge | Context | Status |
| - | - | - |
{table_rows_value}
*last change: {start_date.strftime("%A %Y-%m-%d %H:%M:%S")}*
'''.strip())
def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, with_reduced_upload: bool, artifact_directory: str, artifact_base_url: str) -> Iterable:
instance_suffix = ''
if instance.name:
instance_suffix = f' ({instance.name})'
table_rows = []
for bridge_card in bridge_cards:
bridgeid = bridge_card.get('id')
bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata
print(f'{bridgeid}{instance_suffix}') print(bridgeid + "\n")
bridge_name = bridgeid.replace('Bridge', '') bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html'
context_forms = bridge_card.find_all("form") forms = bridge.find_all("form")
form_number = 1 formid = 1
for context_form in context_forms: for form in forms:
# a bridge can have multiple contexts, named 'forms' in html # a bridge can have multiple contexts, named 'forms' in html
# this code will produce a fully working url that should create a working feed when called # this code will produce a fully working formstring that should create a working feed when called
# this will create an example feed for every single context, to test them all # this will create an example feed for every single context, to test them all
context_parameters = {} formstring = ''
error_messages = [] errormessages = []
context_name = '*untitled*' parameters = form.find_all("input")
context_name_element = context_form.find_previous_sibling('h5') lists = form.find_all("select")
if context_name_element and context_name_element.text.strip() != '':
context_name = context_name_element.text
parameters = context_form.find_all("input")
lists = context_form.find_all("select")
# this for/if mess cycles through all available input parameters, checks if it required, then pulls # this for/if mess cycles through all available input parameters, checks if it required, then pulls
# the default or examplevalue and then combines it all together into the url parameters # the default or examplevalue and then combines it all together into the formstring
# if an example or default value is missing for a required attribute, it will throw an error # if an example or default value is missing for a required attribute, it will throw an error
# any non-required fields are not tested!!! # any non-required fields are not tested!!!
for parameter in parameters: for parameter in parameters:
parameter_type = parameter.get('type') if parameter.get('type') == 'hidden' and parameter.get('name') == 'context':
parameter_name = parameter.get('name') cleanvalue = parameter.get('value').replace(" ","+")
if parameter_type == 'hidden': formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue
context_parameters[parameter_name] = parameter.get('value') if parameter.get('type') == 'number' or parameter.get('type') == 'text':
if parameter_type == 'number' or parameter_type == 'text':
if parameter.has_attr('required'): if parameter.has_attr('required'):
if parameter.get('placeholder') == '': if parameter.get('placeholder') == '':
if parameter.get('value') == '': if parameter.get('value') == '':
error_messages.append(f'Missing example or default value for parameter "{parameter_name}"') errormessages.append(parameter.get('name'))
else: else:
context_parameters[parameter_name] = parameter.get('value') formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value')
else: else:
context_parameters[parameter_name] = parameter.get('placeholder') formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder')
# same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the url parameters # same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring
if parameter_type == 'checkbox': if parameter.get('type') == 'checkbox':
if parameter.has_attr('checked'): if parameter.has_attr('checked'):
context_parameters[parameter_name] = 'on' formstring = formstring + '&' + parameter.get('name') + '=on'
for listing in lists: for listing in lists:
selectionvalue = '' selectionvalue = ''
listname = listing.get('name') listname = listing.get('name')
cleanlist = [] cleanlist = []
options = listing.find_all('option') for option in listing.contents:
for option in options:
if 'optgroup' in option.name: if 'optgroup' in option.name:
cleanlist.extend(option) cleanlist.extend(option)
else: else:
@ -118,91 +67,47 @@ def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, w
if 'selected' in selectionentry.attrs: if 'selected' in selectionentry.attrs:
selectionvalue = selectionentry.get('value') selectionvalue = selectionentry.get('value')
break break
context_parameters[listname] = selectionvalue formstring = formstring + '&' + listname + '=' + selectionvalue
artifact_url = 'about:blank' if not errormessages:
if error_messages: # if all example/default values are present, form the full request string, run the request, replace the static css
status = '<br>'.join(map(lambda m: f'❌ `{m}`', error_messages)) # file with the url of em's public instance and then upload it to termpad.com, a pastebin-like-site.
r = requests.get(URL + bridgestring + formstring)
pagetext = r.text.replace('static/style.css','https://rss-bridge.org/bridge01/static/style.css')
pagetext = pagetext.encode("utf_8")
termpad = requests.post(url="https://termpad.com/", data=pagetext)
termpadurl = termpad.text
termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
termpadurl = termpadurl.replace('\n','')
with open(os.getcwd() + '/comment.txt', 'a+') as file:
file.write("\n")
file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
else: else:
# if all example/default values are present, form the full request url, run the request, add a <base> tag with # if there are errors (which means that a required value has no example or default value), log out which error appeared
# the url of em's public instance to the response text (so that relative paths work, e.g. to the static css file) and termpad = requests.post(url="https://termpad.com/", data=str(errormessages))
# then save it to a html file. termpadurl = termpad.text
context_parameters.update({ termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
'action': 'display', termpadurl = termpadurl.replace('\n','')
'bridge': bridgeid, with open(os.getcwd() + '/comment.txt', 'a+') as file:
'format': 'Html', file.write("\n")
}) file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
request_url = f'{instance.url}/?{urllib.parse.urlencode(context_parameters)}' formid += 1
response = requests.get(request_url)
page_text = response.text.replace('<head>','<head><base href="https://rss-bridge.org/bridge01/" target="_blank">')
page_text = page_text.encode("utf_8")
soup = BeautifulSoup(page_text, "html.parser")
status_messages = []
if response.status_code != 200:
status_messages += [f'❌ `HTTP status {response.status_code} {response.reason}`']
else:
feed_items = soup.select('.feeditem')
feed_items_length = len(feed_items)
if feed_items_length <= 0:
status_messages += [f'⚠️ `The feed has no items`']
elif feed_items_length == 1 and len(soup.select('.error')) > 0:
status_messages += [f'❌ `{getFirstLine(feed_items[0].text)}`']
status_messages += map(lambda e: f'❌ `{getFirstLine(e.text)}`', soup.select('.error .error-type') + soup.select('.error .error-message'))
for item_element in soup.select('.feeditem'): # remove all feed items to not accidentally selected <pre> tags from item content
item_element.decompose()
status_messages += map(lambda e: f'⚠️ `{getFirstLine(e.text)}`', soup.find_all('pre'))
status_messages = list(dict.fromkeys(status_messages)) # remove duplicates
status = '<br>'.join(status_messages)
status_is_ok = status == '';
if status_is_ok:
status = '✔️'
if with_upload and (not with_reduced_upload or not status_is_ok):
filename = f'{bridge_name} {form_number}{instance_suffix}{ARTIFACT_FILE_EXTENSION}'
filename = re.sub(r'[^a-z0-9 \_\-\.]', '', filename, flags=re.I).replace(' ', '_')
with open(file=f'{artifact_directory}/{filename}', mode='wb') as file:
file.write(page_text)
artifact_url = f'{artifact_base_url}/{filename}'
table_rows.append(f'| {bridge_name} | [{form_number} {context_name}{instance_suffix}]({artifact_url}) | {status} |')
form_number += 1
return table_rows
def getFirstLine(value: str) -> str: gitstatus = ["current", "pr"]
# trim whitespace and remove text that can break the table or is simply unnecessary now = datetime.now()
clean_value = re.sub(r'^\[[^\]]+\]\s*rssbridge\.|[\|`]', '', value.strip()) date_time = now.strftime("%Y-%m-%d, %H:%M:%S")
first_line = next(iter(clean_value.splitlines()), '')
max_length = 250
if (len(first_line) > max_length):
first_line = first_line[:max_length] + '...'
return first_line
if __name__ == '__main__': with open(os.getcwd() + '/comment.txt', 'w+') as file:
parser = argparse.ArgumentParser() file.write(''' ## Pull request artifacts
parser.add_argument('--instances', nargs='+') | file | last change |
parser.add_argument('--no-upload', action='store_true') | ---- | ------ |''')
parser.add_argument('--reduced-upload', action='store_true')
parser.add_argument('--title', default='Pull request artifacts') for status in gitstatus: # run this twice, once for the current version, once for the PR version
parser.add_argument('--output-file', default=os.getcwd() + '/comment.txt') if status == "current":
args = parser.parse_args() port = "3000" # both ports are defined in the corresponding workflow .yml file
instances = [] elif status == "pr":
if args.instances: port = "3001"
for instance_arg in args.instances: URL = "http://localhost:" + port
instance_arg_parts = instance_arg.split('::') page = requests.get(URL) # Use python requests to grab the rss-bridge main page
instance = Instance() soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup
instance.name = instance_arg_parts[1].strip() if len(instance_arg_parts) >= 2 else '' bridges = soup.find_all("section") # get a soup-formatted list of all bridges on the rss-bridge page
instance.url = instance_arg_parts[0].strip().rstrip("/") testBridges(bridges,status) # run the main scraping code with the list of bridges and the info if this is for the current version or the pr version
instances.append(instance)
else:
instance = Instance()
instance.name = 'current'
instance.url = 'http://localhost:3000'
instances.append(instance)
instance = Instance()
instance.name = 'pr'
instance.url = 'http://localhost:3001'
instances.append(instance)
main(
instances=instances,
with_upload=not args.no_upload,
with_reduced_upload=args.reduced_upload and not args.no_upload,
title=args.title,
output_file=args.output_file
);

View File

@ -21,7 +21,7 @@ jobs:
- -
name: Docker meta name: Docker meta
id: docker_meta id: docker_meta
uses: docker/metadata-action@v5 uses: docker/metadata-action@v4
with: with:
images: | images: |
${{ env.DOCKERHUB_SLUG }} ${{ env.DOCKERHUB_SLUG }}
@ -33,26 +33,26 @@ jobs:
type=raw,value=stable,enable=${{ startsWith(github.ref, 'refs/tags/20') }} type=raw,value=stable,enable=${{ startsWith(github.ref, 'refs/tags/20') }}
- -
name: Set up QEMU name: Set up QEMU
uses: docker/setup-qemu-action@v3 uses: docker/setup-qemu-action@v2
- -
name: Set up Docker Buildx name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3 uses: docker/setup-buildx-action@v2
- -
name: Login to DockerHub name: Login to DockerHub
uses: docker/login-action@v3 uses: docker/login-action@v2
with: with:
username: ${{ secrets.DOCKER_USERNAME }} username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }} password: ${{ secrets.DOCKER_PASSWORD }}
- -
name: Login to GitHub Container Registry name: Login to GitHub Container Registry
uses: docker/login-action@v3 uses: docker/login-action@v2
with: with:
registry: ghcr.io registry: ghcr.io
username: ${{ github.actor }} username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }} password: ${{ secrets.GITHUB_TOKEN }}
- -
name: Build and push name: Build and push
uses: docker/bake-action@v5 uses: docker/bake-action@v2
with: with:
files: | files: |
./docker-bake.hcl ./docker-bake.hcl

View File

@ -9,7 +9,7 @@ jobs:
documentation: documentation:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
persist-credentials: false persist-credentials: false
- name: Setup PHP - name: Setup PHP

View File

@ -13,7 +13,7 @@ jobs:
matrix: matrix:
php-versions: ['7.4'] php-versions: ['7.4']
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- uses: shivammathur/setup-php@v2 - uses: shivammathur/setup-php@v2
with: with:
php-version: ${{ matrix.php-versions }} php-version: ${{ matrix.php-versions }}
@ -26,7 +26,7 @@ jobs:
matrix: matrix:
php-versions: ['7.4'] php-versions: ['7.4']
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- uses: shivammathur/setup-php@v2 - uses: shivammathur/setup-php@v2
with: with:
php-version: ${{ matrix.php-versions }} php-version: ${{ matrix.php-versions }}
@ -38,7 +38,7 @@ jobs:
executable_php_files_check: executable_php_files_check:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- run: | - run: |
if find -name "*.php" -executable -type f -print -exec false {} + if find -name "*.php" -executable -type f -print -exec false {} +
then then

View File

@ -5,41 +5,24 @@ on:
branches: [ master ] branches: [ master ]
jobs: jobs:
check-bridges:
name: Check if bridges were changed
runs-on: ubuntu-latest
outputs:
BRIDGES: ${{ steps.check1.outputs.BRIDGES }}
steps:
- name: Check number of bridges
id: check1
run: |
PR=${{github.event.number}};
wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch;
bridgeamount=$(cat $PR.patch | grep "\bbridges/[A-Za-z0-9]*Bridge\.php\b" | sed "s=.*\bbridges/\([A-Za-z0-9]*\)Bridge\.php\b.*=\1=g" | sort | uniq | wc -l);
echo "BRIDGES=$bridgeamount" >> "$GITHUB_OUTPUT"
test-pr: test-pr:
name: Generate HTML name: Generate HTML
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: check-bridges
if: needs.check-bridges.outputs.BRIDGES > 0
env:
PYTHONUNBUFFERED: 1
# Needs additional permissions https://github.com/actions/first-interaction/issues/10#issuecomment-1041402989 # Needs additional permissions https://github.com/actions/first-interaction/issues/10#issuecomment-1041402989
steps: steps:
- name: Check out self - name: Check out self
uses: actions/checkout@v4 uses: actions/checkout@v3
with: with:
ref: ${{github.event.pull_request.head.ref}} ref: ${{github.event.pull_request.head.ref}}
repository: ${{github.event.pull_request.head.repo.full_name}} repository: ${{github.event.pull_request.head.repo.full_name}}
- name: Check out rss-bridge - name: Check out rss-bridge
run: | run: |
PR=${{github.event.number}}; PR=${{github.event.number}};
wget -O requirements.txt https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester-requirements.txt; wget -O requirements.txt https://raw.githubusercontent.com/RSS-Bridge/rss-bridge/master/.github/prtester-requirements.txt;
wget https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester.py; wget https://raw.githubusercontent.com/RSS-Bridge/rss-bridge/master/.github/prtester.py;
wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch; wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch;
touch DEBUG; touch DEBUG;
cat $PR.patch | grep "\bbridges/[A-Za-z0-9]*Bridge\.php\b" | sed "s=.*\bbridges/\([A-Za-z0-9]*\)Bridge\.php\b.*=\1=g" | sort | uniq > whitelist.txt cat $PR.patch | grep " bridges/.*\.php" | sed "s= bridges/\(.*\)Bridge.php.*=\1=g" | sort | uniq > whitelist.txt
- name: Start Docker - Current - name: Start Docker - Current
run: | run: |
docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3000:80 ghcr.io/rss-bridge/rss-bridge:latest docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3000:80 ghcr.io/rss-bridge/rss-bridge:latest
@ -48,9 +31,9 @@ jobs:
docker build -t prbuild .; docker build -t prbuild .;
docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3001:80 prbuild docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3001:80 prbuild
- name: Setup python - name: Setup python
uses: actions/setup-python@v5 uses: actions/setup-python@v4
with: with:
python-version: '3.13' python-version: '3.7'
cache: 'pip' cache: 'pip'
- name: Install requirements - name: Install requirements
run: | run: |
@ -66,17 +49,9 @@ jobs:
body="${body//$'\n'/'%0A'}"; body="${body//$'\n'/'%0A'}";
body="${body//$'\r'/'%0D'}"; body="${body//$'\r'/'%0D'}";
echo "bodylength=${#body}" >> $GITHUB_OUTPUT echo "bodylength=${#body}" >> $GITHUB_OUTPUT
env:
PR: ${{ github.event.number }}
- name: Upload generated tests
uses: actions/upload-artifact@v4
id: upload-generated-tests
with:
name: tests
path: '*.html'
- name: Find Comment - name: Find Comment
if: ${{ steps.testrun.outputs.bodylength > 130 }} if: ${{ steps.testrun.outputs.bodylength > 130 }}
uses: peter-evans/find-comment@v3 uses: peter-evans/find-comment@v2
id: fc id: fc
with: with:
issue-number: ${{ github.event.pull_request.number }} issue-number: ${{ github.event.pull_request.number }}
@ -84,43 +59,9 @@ jobs:
body-includes: Pull request artifacts body-includes: Pull request artifacts
- name: Create or update comment - name: Create or update comment
if: ${{ steps.testrun.outputs.bodylength > 130 }} if: ${{ steps.testrun.outputs.bodylength > 130 }}
uses: peter-evans/create-or-update-comment@v4 uses: peter-evans/create-or-update-comment@v2
with: with:
comment-id: ${{ steps.fc.outputs.comment-id }} comment-id: ${{ steps.fc.outputs.comment-id }}
issue-number: ${{ github.event.pull_request.number }} issue-number: ${{ github.event.pull_request.number }}
body-file: comment.txt body-file: comment.txt
edit-mode: replace edit-mode: replace
upload_tests:
name: Upload tests
runs-on: ubuntu-latest
needs: test-pr
steps:
- uses: actions/checkout@v4
with:
repository: 'RSS-Bridge/rss-bridge-tests'
ref: 'main'
token: ${{ secrets.RSSTESTER_ACTION }}
- name: Setup git config
run: |
git config --global user.name "GitHub Actions"
git config --global user.email "<>"
- name: Download tests
uses: actions/download-artifact@v4
with:
name: tests
- name: Move tests
run: |
cd prs
mkdir -p ${{github.event.number}}
cd ${{github.event.number}}
mv -f $GITHUB_WORKSPACE/*.html .
- name: Commit and push generated tests
run: |
export COMMIT_MESSAGE="Added tests for PR ${{github.event.number}}"
git add .
git commit -m "$COMMIT_MESSAGE"
git push

View File

@ -13,11 +13,9 @@ jobs:
matrix: matrix:
php-versions: ['7.4', '8.0', '8.1'] php-versions: ['7.4', '8.0', '8.1']
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- uses: shivammathur/setup-php@v2 - uses: shivammathur/setup-php@v2
with: with:
php-version: ${{ matrix.php-versions }} php-version: ${{ matrix.php-versions }}
env:
update: true
- run: composer install - run: composer install
- run: composer test - run: composer test

4
.gitignore vendored
View File

@ -6,6 +6,7 @@ data/
*.pydevproject *.pydevproject
.project .project
.metadata .metadata
bin/
tmp/ tmp/
*.tmp *.tmp
*.bak *.bak
@ -229,9 +230,6 @@ pip-log.txt
DEBUG DEBUG
config.ini.php config.ini.php
config/* config/*
!config/nginx.conf
!config/php-fpm.conf
!config/php.ini
###################### ######################
## VisualStudioCode ## ## VisualStudioCode ##

View File

@ -15,7 +15,7 @@
* [Astalaseven](https://github.com/Astalaseven) * [Astalaseven](https://github.com/Astalaseven)
* [Astyan-42](https://github.com/Astyan-42) * [Astyan-42](https://github.com/Astyan-42)
* [austinhuang0131](https://github.com/austinhuang0131) * [austinhuang0131](https://github.com/austinhuang0131)
* [axor-mst](https://github.com/axor-mst) * [AxorPL](https://github.com/AxorPL)
* [ayacoo](https://github.com/ayacoo) * [ayacoo](https://github.com/ayacoo)
* [az5he6ch](https://github.com/az5he6ch) * [az5he6ch](https://github.com/az5he6ch)
* [b1nj](https://github.com/b1nj) * [b1nj](https://github.com/b1nj)
@ -23,7 +23,6 @@
* [Binnette](https://github.com/Binnette) * [Binnette](https://github.com/Binnette)
* [BoboTiG](https://github.com/BoboTiG) * [BoboTiG](https://github.com/BoboTiG)
* [Bockiii](https://github.com/Bockiii) * [Bockiii](https://github.com/Bockiii)
* [brtsos](https://github.com/brtsos)
* [captn3m0](https://github.com/captn3m0) * [captn3m0](https://github.com/captn3m0)
* [chemel](https://github.com/chemel) * [chemel](https://github.com/chemel)
* [Chouchen](https://github.com/Chouchen) * [Chouchen](https://github.com/Chouchen)
@ -145,7 +144,6 @@
* [Niehztog](https://github.com/Niehztog) * [Niehztog](https://github.com/Niehztog)
* [NikNikYkt](https://github.com/NikNikYkt) * [NikNikYkt](https://github.com/NikNikYkt)
* [Nono-m0le](https://github.com/Nono-m0le) * [Nono-m0le](https://github.com/Nono-m0le)
* [NotsoanoNimus](https://github.com/NotsoanoNimus)
* [obsiwitch](https://github.com/obsiwitch) * [obsiwitch](https://github.com/obsiwitch)
* [Ololbu](https://github.com/Ololbu) * [Ololbu](https://github.com/Ololbu)
* [ORelio](https://github.com/ORelio) * [ORelio](https://github.com/ORelio)

View File

@ -1,72 +1,36 @@
FROM debian:12-slim AS rssbridge FROM lwthiker/curl-impersonate:0.5-ff-slim-buster AS curlimpersonate
FROM php:8.0.27-fpm-buster AS rssbridge
LABEL description="RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one." LABEL description="RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one."
LABEL repository="https://github.com/RSS-Bridge/rss-bridge" LABEL repository="https://github.com/RSS-Bridge/rss-bridge"
LABEL website="https://github.com/RSS-Bridge/rss-bridge" LABEL website="https://github.com/RSS-Bridge/rss-bridge"
ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && \
RUN set -xe && \
apt-get update && \
apt-get install --yes --no-install-recommends \ apt-get install --yes --no-install-recommends \
ca-certificates \
nginx \ nginx \
zlib1g-dev \
libzip-dev \
libmemcached-dev \
nss-plugin-pem \ nss-plugin-pem \
php-curl \ libicu-dev && \
php-fpm \ docker-php-ext-install zip && \
php-intl \ docker-php-ext-install intl && \
# php-json is enabled by default with PHP 8.2 in Debian 12 pecl install memcached && \
php-mbstring \ docker-php-ext-enable memcached && \
php-memcached \ docker-php-ext-enable opcache && \
# php-opcache is enabled by default with PHP 8.2 in Debian 12 mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini"
# php-openssl is enabled by default with PHP 8.2 in Debian 12
php-sqlite3 \
php-xml \
php-zip \
# php-zlib is enabled by default with PHP 8.2 in Debian 12
# for downloading libcurl-impersonate
curl \
&& \
# install curl-impersonate library
curlimpersonate_version=0.6.0 && \
{ \
{ \
[ $(arch) = 'aarch64' ] && \
archive="libcurl-impersonate-v${curlimpersonate_version}.aarch64-linux-gnu.tar.gz" && \
sha512sum="d04b1eabe71f3af06aa1ce99b39a49c5e1d33b636acedcd9fad163bc58156af5c3eb3f75aa706f335515791f7b9c7a6c40ffdfa47430796483ecef929abd905d" \
; } \
|| { \
[ $(arch) = 'armv7l' ] && \
archive="libcurl-impersonate-v${curlimpersonate_version}.arm-linux-gnueabihf.tar.gz" && \
sha512sum="05906b4efa1a6ed8f3b716fd83d476b6eea6bfc68e3dbc5212d65a2962dcaa7bd1f938c9096a7535252b11d1d08fb93adccc633585ff8cb8cec5e58bfe969bc9" \
; } \
|| { \
[ $(arch) = 'x86_64' ] && \
archive="libcurl-impersonate-v${curlimpersonate_version}.x86_64-linux-gnu.tar.gz" && \
sha512sum="480bbe9452cd9aff2c0daaaf91f1057b3a96385f79011628a9237223757a9b0d090c59cb5982dc54ea0d07191657299ea91ca170a25ced3d7d410fcdff130ace" \
; } \
} && \
curl -LO "https://github.com/lwthiker/curl-impersonate/releases/download/v${curlimpersonate_version}/${archive}" && \
echo "$sha512sum $archive" | sha512sum -c - && \
mkdir -p /usr/local/lib/curl-impersonate && \
tar xaf "$archive" -C /usr/local/lib/curl-impersonate --wildcards 'libcurl-impersonate-ff.so*' && \
rm "$archive" && \
apt-get purge --assume-yes curl && \
rm -rf /var/lib/apt/lists/*
ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so COPY ./config/nginx.conf /etc/nginx/sites-enabled/default
ENV CURL_IMPERSONATE ff91esr
# logs should go to stdout / stderr
RUN ln -sfT /dev/stderr /var/log/nginx/error.log; \
ln -sfT /dev/stdout /var/log/nginx/access.log; \
chown -R --no-dereference www-data:adm /var/log/nginx/
COPY ./config/nginx.conf /etc/nginx/sites-available/default
COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf
COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini
COPY --chown=www-data:www-data ./ /app/ COPY --chown=www-data:www-data ./ /app/
COPY --from=curlimpersonate /usr/local/lib/libcurl-impersonate-ff.so /usr/local/lib/curl-impersonate/
ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so
ENV CURL_IMPERSONATE ff91esr
EXPOSE 80 EXPOSE 80
ENTRYPOINT ["/app/docker-entrypoint.sh"] ENTRYPOINT ["/app/docker-entrypoint.sh"]

420
README.md
View File

@ -2,25 +2,12 @@
![RSS-Bridge](static/logo_600px.png) ![RSS-Bridge](static/logo_600px.png)
RSS-Bridge is a PHP web application. RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one.
It generates web feeds for websites that don't have one.
Officially hosted instance: https://rss-bridge.org/bridge01/
IRC channel #rssbridge at https://libera.chat/
[Full documentation](https://rss-bridge.github.io/rss-bridge/index.html)
Alternatively find another
[public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html).
Requires minimum PHP 7.4.
[![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE)
[![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest)
[![irc.libera.chat](https://img.shields.io/badge/irc.libera.chat-%23rssbridge-blue.svg)](https://web.libera.chat/#rssbridge) [![irc.libera.chat](https://img.shields.io/badge/irc.libera.chat-%23rssbridge-blue.svg)](https://web.libera.chat/#rssbridge)
[![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/#rssbridge:libera.chat)
[![Actions Status](https://img.shields.io/github/actions/workflow/status/RSS-Bridge/rss-bridge/tests.yml?branch=master&label=GitHub%20Actions&logo=github)](https://github.com/RSS-Bridge/rss-bridge/actions) [![Actions Status](https://img.shields.io/github/actions/workflow/status/RSS-Bridge/rss-bridge/tests.yml?branch=master&label=GitHub%20Actions&logo=github)](https://github.com/RSS-Bridge/rss-bridge/actions)
||| |||
@ -28,219 +15,116 @@ Requires minimum PHP 7.4.
|![Screenshot #1](/static/screenshot-1.png?raw=true)|![Screenshot #2](/static/screenshot-2.png?raw=true)| |![Screenshot #1](/static/screenshot-1.png?raw=true)|![Screenshot #2](/static/screenshot-2.png?raw=true)|
|![Screenshot #3](/static/screenshot-3.png?raw=true)|![Screenshot #4](/static/screenshot-4.png?raw=true)| |![Screenshot #3](/static/screenshot-3.png?raw=true)|![Screenshot #4](/static/screenshot-4.png?raw=true)|
|![Screenshot #5](/static/screenshot-5.png?raw=true)|![Screenshot #6](/static/screenshot-6.png?raw=true)| |![Screenshot #5](/static/screenshot-5.png?raw=true)|![Screenshot #6](/static/screenshot-6.png?raw=true)|
|![Screenshot #7](/static/twitter-form.png?raw=true)|![Screenshot #8](/static/twitter-rasmus.png?raw=true)|
## A subset of bridges (15/447) ## A subset of bridges
* `CssSelectorBridge`: [Scrape out a feed using CSS selectors](https://rss-bridge.org/bridge01/#bridge-CssSelectorBridge) * `YouTube` : YouTube user channel, playlist or search
* `FeedMergeBridge`: [Combine multiple feeds into one](https://rss-bridge.org/bridge01/#bridge-FeedMergeBridge) * `Twitter` : Return keyword/hashtag search or user timeline
* `FeedReducerBridge`: [Reduce a noisy feed by some percentage](https://rss-bridge.org/bridge01/#bridge-FeedReducerBridge) * `Telegram` : Return the latest posts from a public group
* `FilterBridge`: [Filter a feed by excluding/including items by keyword](https://rss-bridge.org/bridge01/#bridge-FilterBridge) * `Reddit` : Return the latest posts from a subreddit or user
* `GettrBridge`: [Fetches the latest posts from a GETTR user](https://rss-bridge.org/bridge01/#bridge-GettrBridge) * `Filter` : Filter an existing feed url
* `MastodonBridge`: [Fetches statuses from a Mastodon (ActivityPub) instance](https://rss-bridge.org/bridge01/#bridge-MastodonBridge) * `Vk` : Latest posts from a user or group
* `RedditBridge`: [Fetches posts from a user/subredit (with filtering options)](https://rss-bridge.org/bridge01/#bridge-RedditBridge) * `FeedMerge` : Merge two or more existing feeds into one
* `RumbleBridge`: [Fetches channel/user videos](https://rss-bridge.org/bridge01/#bridge-RumbleBridge) * `Twitch` : Fetch the latest videos from a channel
* `SoundcloudBridge`: [Fetches music by username](https://rss-bridge.org/bridge01/#bridge-SoundcloudBridge) * `ThePirateBay` : Returns the newest indexed torrents from [The Pirate Bay](https://thepiratebay.se/) with keywords
* `TelegramBridge`: [Fetches posts from a public channel](https://rss-bridge.org/bridge01/#bridge-TelegramBridge)
* `ThePirateBayBridge:` [Fetches torrents by search/user/category](https://rss-bridge.org/bridge01/#bridge-ThePirateBayBridge) And [many more](bridges/), thanks to the community!
* `TikTokBridge`: [Fetches posts by username](https://rss-bridge.org/bridge01/#bridge-TikTokBridge)
* `TwitchBridge`: [Fetches videos from channel](https://rss-bridge.org/bridge01/#bridge-TwitchBridge) [Full documentation](https://rss-bridge.github.io/rss-bridge/index.html)
* `XPathBridge`: [Scrape out a feed using XPath expressions](https://rss-bridge.org/bridge01/#bridge-XPathBridge)
* `YoutubeBridge`: [Fetches videos by username/channel/playlist/search](https://rss-bridge.org/bridge01/#bridge-YoutubeBridge) Check out RSS-Bridge right now on https://rss-bridge.org/bridge01 or find another
* `YouTubeCommunityTabBridge`: [Fetches posts from a channel's community tab](https://rss-bridge.org/bridge01/#bridge-YouTubeCommunityTabBridge) [public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html).
## Tutorial ## Tutorial
### How to install on traditional shared web hosting RSS-Bridge requires php 7.4 (or higher).
RSS-Bridge can basically be unzipped into a web folder. Should be working instantly. ### Install with composer or git
Latest zip:
https://github.com/RSS-Bridge/rss-bridge/archive/refs/heads/master.zip (2MB)
### How to install on Debian 12 (nginx + php-fpm)
These instructions have been tested on a fresh Debian 12 VM from Digital Ocean (1vcpu-512mb-10gb, 5 USD/month).
```shell ```shell
timedatectl set-timezone Europe/Oslo
apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl php-intl
# Create a user account
useradd --shell /bin/bash --create-home rss-bridge
cd /var/www cd /var/www
composer create-project --no-dev rss-bridge/rss-bridge
# Create folder and change its ownership to rss-bridge
mkdir rss-bridge && chown rss-bridge:rss-bridge rss-bridge/
# Become rss-bridge
su rss-bridge
# Clone master branch into existing folder
git clone https://github.com/RSS-Bridge/rss-bridge.git rss-bridge/
cd rss-bridge
# Copy over the default config (OPTIONAL)
cp -v config.default.ini.php config.ini.php
# Recursively give full permissions to user/owner
chmod 700 --recursive ./
# Give read and execute to others on folder ./static
chmod o+rx ./ ./static
# Recursively give give read to others on folder ./static
chmod o+r --recursive ./static
``` ```
Nginx config: ```shell
cd /var/www
git clone https://github.com/RSS-Bridge/rss-bridge.git
```
Config:
```shell
# Give the http user write permission to the cache folder
chown www-data:www-data /var/www/rss-bridge/cache
# Optionally copy over the default config file
cp config.default.ini.php config.ini.php
```
Example config for nginx:
```nginx ```nginx
# /etc/nginx/sites-enabled/rss-bridge.conf # /etc/nginx/sites-enabled/rssbridge
server { server {
listen 80; listen 80;
# TODO: change to your own server name
server_name example.com; server_name example.com;
root /var/www/rss-bridge;
index index.php;
access_log /var/log/nginx/rss-bridge.access.log; location ~ \.php$ {
error_log /var/log/nginx/rss-bridge.error.log;
log_not_found off;
# Intentionally not setting a root folder
# Static content only served here
location /static/ {
alias /var/www/rss-bridge/static/;
}
# Pass off to php-fpm only when location is EXACTLY == /
location = / {
root /var/www/rss-bridge/;
include snippets/fastcgi-php.conf; include snippets/fastcgi-php.conf;
fastcgi_read_timeout 45s; fastcgi_read_timeout 60s;
fastcgi_pass unix:/run/php/rss-bridge.sock; fastcgi_pass unix:/run/php/php-fpm.sock;
}
# Reduce log noise
location = /favicon.ico {
access_log off;
}
# Reduce log noise
location = /robots.txt {
access_log off;
} }
} }
``` ```
PHP FPM pool config: ### Install with Docker:
```ini
; /etc/php/8.2/fpm/pool.d/rss-bridge.conf
[rss-bridge] Install by using docker image from Docker Hub:
user = rss-bridge
group = rss-bridge
listen = /run/php/rss-bridge.sock
listen.owner = www-data
listen.group = www-data
; Create 10 workers standing by to serve requests
pm = static
pm.max_children = 10
; Respawn worker after 500 requests (workaround for memory leaks etc.)
pm.max_requests = 500
```
PHP ini config:
```ini
; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini
max_execution_time = 15
memory_limit = 64M
```
Restart fpm and nginx:
```shell
# Lint and restart php-fpm
php-fpm8.2 -t && systemctl restart php8.2-fpm
# Lint and restart nginx
nginx -t && systemctl restart nginx
```
### How to install from Composer
Install the latest release.
```shell
cd /var/www
composer create-project -v --no-dev --no-scripts rss-bridge/rss-bridge
```
### How to install with Caddy
TODO. See https://github.com/RSS-Bridge/rss-bridge/issues/3785
### Install from Docker Hub:
Install by downloading the docker image from Docker Hub:
```bash ```bash
# Create container # Create container
docker create --name=rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rssbridge/rss-bridge docker create --name=rss-bridge --publish 3000:80 rssbridge/rss-bridge
```
You can put custom `config.ini.php` and bridges into `./config`.
**You must restart container for custom changes to take effect.**
See `docker-entrypoint.sh` for details.
```bash
# Start container # Start container
docker start rss-bridge docker start rss-bridge
``` ```
Browse http://localhost:3000/ Browse http://localhost:3000/
### Install by locally building from Dockerfile Install by locally building the image:
```bash ```bash
# Build image from Dockerfile # Build image from Dockerfile
docker build -t rss-bridge . docker build -t rss-bridge .
# Create container # Create container
docker create --name rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rss-bridge docker create --name rss-bridge --publish 3000:80 rss-bridge
```
You can put custom `config.ini.php` and bridges into `./config`. # Start the container
**You must restart container for custom changes to take effect.**
See `docker-entrypoint.sh` for details.
```bash
# Start container
docker start rss-bridge docker start rss-bridge
``` ```
Browse http://localhost:3000/ Browse http://localhost:3000/
### Install with docker-compose (using Docker Hub) #### Install with docker-compose
You can put custom `config.ini.php` and bridges into `./config`. Create a `docker-compose.yml` file locally with with the following content:
```yml
version: '2'
services:
rss-bridge:
image: rssbridge/rss-bridge:latest
volumes:
- </local/custom/path>:/config
ports:
- 3000:80
restart: unless-stopped
```
**You must restart container for custom changes to take effect.** Then launch with `docker-compose`:
See `docker-entrypoint.sh` for details.
```bash ```bash
docker-compose up docker-compose up
@ -248,14 +132,13 @@ docker-compose up
Browse http://localhost:3000/ Browse http://localhost:3000/
### Other installation methods ### Alternative installation methods
[![Deploy on Scalingo](https://cdn.scalingo.com/deploy/button.svg)](https://my.scalingo.com/deploy?source=https://github.com/sebsauvage/rss-bridge) [![Deploy on Scalingo](https://cdn.scalingo.com/deploy/button.svg)](https://my.scalingo.com/deploy?source=https://github.com/sebsauvage/rss-bridge)
[![Deploy to Heroku](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy) [![Deploy to Heroku](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy)
[![Deploy to Cloudron](https://cloudron.io/img/button.svg)](https://www.cloudron.io/store/com.rssbridgeapp.cloudronapp.html) [![Deploy to Cloudron](https://cloudron.io/img/button.svg)](https://www.cloudron.io/store/com.rssbridgeapp.cloudronapp.html)
[![Run on PikaPods](https://www.pikapods.com/static/run-button.svg)](https://www.pikapods.com/pods?run=rssbridge)
The Heroku quick deploy currently does not work. It might work if you fork this repo and The Heroku quick deploy currently does not work. It might possibly work if you fork this repo and
modify the `repository` in `scalingo.json`. See https://github.com/RSS-Bridge/rss-bridge/issues/2688 modify the `repository` in `scalingo.json`. See https://github.com/RSS-Bridge/rss-bridge/issues/2688
Learn more in Learn more in
@ -263,64 +146,6 @@ Learn more in
## How-to ## How-to
### How to fix "Access denied."
Output is from php-fpm. It is unable to read index.php.
chown rss-bridge:rss-bridge /var/www/rss-bridge/index.php
### How to password-protect the instance (token)
Modify `config.ini.php`:
[authentication]
token = "hunter2"
### How to remove all cache items
As current user:
bin/cache-clear
As user rss-bridge:
sudo -u rss-bridge bin/cache-clear
As root:
sudo bin/cache-clear
### How to remove all expired cache items
bin/cache-prune
### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable"
```shell
# Give rss-bridge ownership
chown rss-bridge:rss-bridge -R /var/www/rss-bridge/cache
# Or, give www-data ownership
chown www-data:www-data -R /var/www/rss-bridge/cache
# Or, give everyone write permission
chmod 777 -R /var/www/rss-bridge/cache
# Or last ditch effort (CAREFUL)
rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/
```
### How to fix "attempt to write a readonly database"
The sqlite files (db, wal and shm) are not writeable.
chown -v rss-bridge:rss-bridge cache/*
### How to fix "Unable to prepare statement: 1, no such table: storage"
rm cache/*
### How to create a new bridge from scratch ### How to create a new bridge from scratch
Create the new bridge in e.g. `bridges/BearBlogBridge.php`: Create the new bridge in e.g. `bridges/BearBlogBridge.php`:
@ -361,92 +186,13 @@ enabled_bridges[] = GettrBridge
### How to enable debug mode ### How to enable debug mode
The
[debug mode](https://rss-bridge.github.io/rss-bridge/For_Developers/Debug_mode.html)
disables the majority of caching operations.
enable_debug_mode = true enable_debug_mode = true
### How to switch to memcached as cache backend Learn more in [debug mode](https://rss-bridge.github.io/rss-bridge/For_Developers/Debug_mode.html).
```
[cache]
; Cache backend: file (default), sqlite, memcached, null
type = "memcached"
```
### How to switch to sqlite3 as cache backend
type = "sqlite"
### How to disable bridge errors (as feed items)
When a bridge fails, RSS-Bridge will produce a feed with a single item describing the error.
This way, feed readers pick it up and you are notified.
If you don't want this behaviour, switch the error output to `http`:
[error]
; Defines how error messages are returned by RSS-Bridge
;
; "feed" = As part of the feed (default)
; "http" = As HTTP error message
; "none" = No errors are reported
output = "http"
### How to accumulate errors before finally reporting it
Modify `report_limit` so that an error must occur 3 times before it is reported.
; Defines how often an error must occur before it is reported to the user
report_limit = 3
The report count is reset to 0 each day.
### How to password-protect the instance (HTTP Basic Auth)
[authentication]
enable = true
username = "alice"
password = "cat"
Will typically require feed readers to be configured with the credentials.
It may also be possible to manually include the credentials in the URL:
https://alice:cat@rss-bridge.org/bridge01/?action=display&bridge=FabriceBellardBridge&format=Html
### How to create a new output format ### How to create a new output format
See `formats/PlaintextFormat.php` for an example. [Create a new format](https://rss-bridge.github.io/rss-bridge/Format_API/index.html).
### How to run unit tests and linter
These commands require that you have installed the dev dependencies in `composer.json`.
Run all tests:
./vendor/bin/phpunit
Run a single test class:
./vendor/bin/phpunit --filter UrlTest
Run linter:
./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./
https://github.com/squizlabs/PHP_CodeSniffer/wiki
### How to spawn a minimal development environment
php -S 127.0.0.1:9001
http://127.0.0.1:9001/
## Explanation ## Explanation
@ -459,18 +205,15 @@ webmaster of
See [CONTRIBUTORS.md](CONTRIBUTORS.md) See [CONTRIBUTORS.md](CONTRIBUTORS.md)
RSS-Bridge uses caching to prevent services from banning your server for repeatedly updating feeds. RSS-Bridge uses caching to prevent services from banning your server for repeatedly updating feeds.
The specific cache duration can be different between bridges. The specific cache duration can be different between bridges. Cached files are deleted automatically after 24 hours.
RSS-Bridge allows you to take full control over which bridges are displayed to the user. RSS-Bridge allows you to take full control over which bridges are displayed to the user.
That way you can host your own RSS-Bridge service with your favorite collection of bridges! That way you can host your own RSS-Bridge service with your favorite collection of bridges!
Current maintainers (as of 2024): @dvikan and @Mynacol #2519
## Reference ## Reference
### Feed item structure ### FeedItem properties
This is the feed item structure that bridges are expected to produce.
```php ```php
$item = [ $item = [
@ -493,22 +236,13 @@ This is the feed item structure that bridges are expected to produce.
] ]
``` ```
### Output formats ### Output formats:
* `Atom`: Atom feed, for use in feed readers * `Atom` : Atom feed, for use in feed readers
* `Html`: Simple HTML page * `Html` : Simple HTML page
* `Json`: JSON, for consumption by other applications * `Json` : JSON, for consumption by other applications
* `Mrss`: MRSS feed, for use in feed readers * `Mrss` : MRSS feed, for use in feed readers
* `Plaintext`: Raw text, for consumption by other applications * `Plaintext` : Raw text, for consumption by other applications
* `Sfeed`: Text, TAB separated
### Cache backends
* `File`
* `SQLite`
* `Memcached`
* `Array`
* `Null`
### Licenses ### Licenses

View File

@ -1,5 +1,17 @@
<?php <?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/** /**
* Checks if the website for a given bridge is reachable. * Checks if the website for a given bridge is reachable.
* *
@ -14,26 +26,23 @@ class ConnectivityAction implements ActionInterface
{ {
private BridgeFactory $bridgeFactory; private BridgeFactory $bridgeFactory;
public function __construct( public function __construct()
BridgeFactory $bridgeFactory {
) { $this->bridgeFactory = new BridgeFactory();
$this->bridgeFactory = $bridgeFactory;
} }
public function __invoke(Request $request): Response public function execute(array $request)
{ {
if (!Debug::isEnabled()) { if (!Debug::isEnabled()) {
return new Response('This action is only available in debug mode!', 403); throw new \Exception('This action is only available in debug mode!');
} }
$bridgeName = $request->get('bridge'); if (!isset($request['bridge'])) {
if (!$bridgeName) { return render_template('connectivity.html.php');
return new Response(render_template('connectivity.html.php'));
}
$bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName);
if (!$bridgeClassName) {
return new Response('Bridge not found', 404);
} }
$bridgeClassName = $this->bridgeFactory->createBridgeClassName($request['bridge']);
return $this->reportBridgeConnectivity($bridgeClassName); return $this->reportBridgeConnectivity($bridgeClassName);
} }
@ -43,25 +52,29 @@ class ConnectivityAction implements ActionInterface
throw new \Exception('Bridge is not whitelisted!'); throw new \Exception('Bridge is not whitelisted!');
} }
$bridge = $this->bridgeFactory->create($bridgeClassName); $retVal = [
$curl_opts = [
CURLOPT_CONNECTTIMEOUT => 5,
CURLOPT_FOLLOWLOCATION => true,
];
$result = [
'bridge' => $bridgeClassName, 'bridge' => $bridgeClassName,
'successful' => false, 'successful' => false,
'http_code' => null, 'http_code' => 200,
];
$bridge = $this->bridgeFactory->create($bridgeClassName);
$curl_opts = [
CURLOPT_CONNECTTIMEOUT => 5
]; ];
try { try {
$response = getContents($bridge::URI, [], $curl_opts, true); $reply = getContents($bridge::URI, [], $curl_opts, true);
$result['http_code'] = $response->getCode();
if (in_array($result['http_code'], [200])) { if ($reply['code'] === 200) {
$result['successful'] = true; $retVal['successful'] = true;
if (strpos(implode('', $reply['status_lines']), '301 Moved Permanently')) {
$retVal['http_code'] = 301;
}
} }
} catch (\Exception $e) { } catch (\Exception $e) {
$retVal['successful'] = false;
} }
return new Response(Json::encode($result), 200, ['content-type' => 'text/json']); return new Response(Json::encode($retVal), 200, ['Content-Type' => 'text/json']);
} }
} }

View File

@ -1,51 +1,53 @@
<?php <?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
class DetectAction implements ActionInterface class DetectAction implements ActionInterface
{ {
private BridgeFactory $bridgeFactory; public function execute(array $request)
public function __construct(
BridgeFactory $bridgeFactory
) {
$this->bridgeFactory = $bridgeFactory;
}
public function __invoke(Request $request): Response
{ {
$url = $request->get('url'); $targetURL = $request['url'] ?? null;
$format = $request->get('format'); $format = $request['format'] ?? null;
if (!$url) { if (!$targetURL) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a url'])); throw new \Exception('You must specify a url!');
} }
if (!$format) { if (!$format) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format'])); throw new \Exception('You must specify a format!');
} }
foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) { $bridgeFactory = new BridgeFactory();
if (!$this->bridgeFactory->isEnabled($bridgeClassName)) {
foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
continue; continue;
} }
$bridge = $this->bridgeFactory->create($bridgeClassName); $bridge = $bridgeFactory->create($bridgeClassName);
$bridgeParams = $bridge->detectParameters($url); $bridgeParams = $bridge->detectParameters($targetURL);
if (!$bridgeParams) { if (is_null($bridgeParams)) {
continue; continue;
} }
$query = [ $bridgeParams['bridge'] = $bridgeClassName;
'action' => 'display', $bridgeParams['format'] = $format;
'bridge' => $bridgeClassName,
'format' => $format, $url = '?action=display&' . http_build_query($bridgeParams);
]; return new Response('', 301, ['Location' => $url]);
$query = array_merge($query, $bridgeParams);
return new Response('', 301, ['location' => '?' . http_build_query($query)]);
} }
return new Response(render(__DIR__ . '/../templates/error.html.php', [ throw new \Exception('No bridge found for given URL: ' . $targetURL);
'message' => 'No bridge found for given URL: ' . $url,
]));
} }
} }

View File

@ -1,43 +1,44 @@
<?php <?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
class DisplayAction implements ActionInterface class DisplayAction implements ActionInterface
{ {
private CacheInterface $cache; public function execute(array $request)
private Logger $logger;
private BridgeFactory $bridgeFactory;
public function __construct(
CacheInterface $cache,
Logger $logger,
BridgeFactory $bridgeFactory
) {
$this->cache = $cache;
$this->logger = $logger;
$this->bridgeFactory = $bridgeFactory;
}
public function __invoke(Request $request): Response
{ {
$bridgeName = $request->get('bridge'); if (Configuration::getConfig('system', 'enable_maintenance_mode')) {
$format = $request->get('format'); return new Response('503 Service Unavailable', 503);
$noproxy = $request->get('_noproxy');
if (!$bridgeName) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Missing bridge name parameter']), 400);
}
$bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName);
if (!$bridgeClassName) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Bridge not found']), 404);
} }
$bridgeFactory = new BridgeFactory();
$bridgeClassName = $bridgeFactory->createBridgeClassName($request['bridge'] ?? '');
$format = $request['format'] ?? null;
if (!$format) { if (!$format) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']), 400); throw new \Exception('You must specify a format!');
} }
if (!$this->bridgeFactory->isEnabled($bridgeClassName)) { if (!$bridgeFactory->isEnabled($bridgeClassName)) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'This bridge is not whitelisted']), 400); throw new \Exception('This bridge is not whitelisted');
} }
// Disable proxy (if enabled and per user's request) $formatFactory = new FormatFactory();
$format = $formatFactory->create($format);
$bridge = $bridgeFactory->create($bridgeClassName);
$bridge->loadConfiguration();
$noproxy = $request['_noproxy'] ?? null;
if ( if (
Configuration::getConfig('proxy', 'url') Configuration::getConfig('proxy', 'url')
&& Configuration::getConfig('proxy', 'by_bridge') && Configuration::getConfig('proxy', 'by_bridge')
@ -47,135 +48,176 @@ class DisplayAction implements ActionInterface
define('NOPROXY', true); define('NOPROXY', true);
} }
$cacheKey = 'http_' . json_encode($request->toArray()); $cacheTimeout = $request['_cache_timeout'] ?? null;
if (Configuration::getConfig('cache', 'custom_timeout') && $cacheTimeout) {
$bridge = $this->bridgeFactory->create($bridgeClassName); $cacheTimeout = (int) $cacheTimeout;
$response = $this->createResponse($request, $bridge, $format);
if ($response->getCode() === 200) {
$ttl = $request->get('_cache_timeout');
if (Configuration::getConfig('cache', 'custom_timeout') && $ttl) {
$ttl = (int) $ttl;
} else { } else {
$ttl = $bridge->getCacheTimeout(); // At this point the query argument might still be in the url but it won't be used
} $cacheTimeout = $bridge->getCacheTimeout();
$this->cache->set($cacheKey, $response, $ttl);
} }
return $response;
}
private function createResponse(Request $request, BridgeAbstract $bridge, string $format)
{
$items = [];
try {
$bridge->loadConfiguration();
// Remove parameters that don't concern bridges // Remove parameters that don't concern bridges
$remove = [ $bridge_params = array_diff_key(
'token', $request,
array_fill_keys(
[
'action', 'action',
'bridge', 'bridge',
'format', 'format',
'_noproxy', '_noproxy',
'_cache_timeout', '_cache_timeout',
'_error_time', '_error_time'
'_', // Some RSS readers add a cache-busting parameter (_=<timestamp>) to feed URLs, detect and ignore them. ],
]; ''
$requestArray = $request->toArray(); )
$input = array_diff_key($requestArray, array_fill_keys($remove, '')); );
$bridge->setInput($input);
$bridge->collectData(); // Remove parameters that don't concern caches
$items = $bridge->getItems(); $cache_params = array_diff_key(
} catch (\Throwable $e) { $request,
if ($e instanceof RateLimitException) { array_fill_keys(
// These are internally generated by bridges [
$this->logger->info(sprintf('RateLimitException in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e))); 'action',
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 429); 'format',
'_noproxy',
'_cache_timeout',
'_error_time'
],
''
)
);
$cache = RssBridge::getCache();
$cache->setScope('');
$cache->setKey($cache_params);
// This cache purge will basically delete all cache items older than 24h, regardless of scope and key
$cache->purgeCache(86400);
$items = [];
$infos = [];
$mtime = $cache->getTime();
if (
$mtime
&& (time() - $cacheTimeout < $mtime)
&& !Debug::isEnabled()
) {
// At this point we found the feed in the cache and debug mode is disabled
if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) {
// The client wants to know if the feed has changed since its last check
$stime = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']);
if ($mtime <= $stime) {
$lastModified2 = gmdate('D, d M Y H:i:s ', $mtime) . 'GMT';
return new Response('', 304, ['Last-Modified' => $lastModified2]);
} }
if ($e instanceof HttpException) {
if (in_array($e->getCode(), [429, 503])) {
// Log with debug, immediately reproduce and return
$this->logger->debug(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e)));
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), $e->getCode());
} }
// Some other status code which we let fail normally (but don't log it)
// Load the feed from cache and prepare it
$cached = $cache->loadData();
if (isset($cached['items']) && isset($cached['extraInfos'])) {
foreach ($cached['items'] as $item) {
$items[] = new FeedItem($item);
}
$infos = $cached['extraInfos'];
}
} else { } else {
// Log error if it's not an HttpException // At this point we did NOT find the feed in the cache or debug mode is enabled.
$this->logger->error(sprintf('Exception in DisplayAction(%s)', $bridge->getShortName()), ['e' => $e]); try {
$bridge->setDatas($bridge_params);
$bridge->collectData();
$items = $bridge->getItems();
if (isset($items[0]) && is_array($items[0])) {
$feedItems = [];
foreach ($items as $item) {
$feedItems[] = new FeedItem($item);
} }
$errorOutput = Configuration::getConfig('error', 'output'); $items = $feedItems;
$reportLimit = Configuration::getConfig('error', 'report_limit');
$errorCount = 1;
if ($reportLimit > 1) {
$errorCount = $this->logBridgeError($bridge->getName(), $e->getCode());
} }
// Let clients know about the error if we are passed the report limit $infos = [
if ($errorCount >= $reportLimit) { 'name' => $bridge->getName(),
if ($errorOutput === 'feed') { 'uri' => $bridge->getURI(),
// Render the exception as a feed item 'donationUri' => $bridge->getDonationURI(),
$items = [$this->createFeedItemFromException($e, $bridge)]; 'icon' => $bridge->getIcon()
} elseif ($errorOutput === 'http') { ];
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 500); } catch (\Throwable $e) {
} elseif ($errorOutput === 'none') { if ($e instanceof HttpException) {
// Do nothing (produces an empty feed) // Produce a smaller log record for http exceptions
Logger::warning(sprintf('Exception in %s: %s', $bridgeClassName, create_sane_exception_message($e)));
} else {
// Log the exception
Logger::error(sprintf('Exception in %s', $bridgeClassName), ['e' => $e]);
}
// Emit error only if we are passed the error report limit
$errorCount = self::logBridgeError($bridge->getName(), $e->getCode());
if ($errorCount >= Configuration::getConfig('error', 'report_limit')) {
if (Configuration::getConfig('error', 'output') === 'feed') {
// Emit the error as a feed item in a feed so that feed readers can pick it up
$items[] = $this->createFeedItemFromException($e, $bridge);
} elseif (Configuration::getConfig('error', 'output') === 'http') {
// Emit as a regular web response
throw $e;
} }
} }
} }
$formatFactory = new FormatFactory(); // Unfortunately need to set scope and key again because they might be modified
$format = $formatFactory->create($format); $cache->setScope('');
$cache->setKey($cache_params);
$cache->saveData([
'items' => array_map(function (FeedItem $item) {
return $item->toArray();
}, $items),
'extraInfos' => $infos
]);
}
$format->setItems($items); $format->setItems($items);
$format->setFeed($bridge->getFeed()); $format->setExtraInfos($infos);
$now = time(); $lastModified = $cache->getTime();
$format->setLastModified($now); $format->setLastModified($lastModified);
$headers = [ $headers = [];
'last-modified' => gmdate('D, d M Y H:i:s ', $now) . 'GMT', if ($lastModified) {
'content-type' => $format->getMimeType() . '; charset=UTF-8', $headers['Last-Modified'] = gmdate('D, d M Y H:i:s ', $lastModified) . 'GMT';
]; }
$body = $format->render(); $headers['Content-Type'] = $format->getMimeType() . '; charset=' . $format->getCharset();
return new Response($format->stringify(), 200, $headers);
// This is supposed to remove non-utf8 byte sequences, but I'm unsure if it works
ini_set('mbstring.substitute_character', 'none');
$body = mb_convert_encoding($body, 'UTF-8', 'UTF-8');
return new Response($body, 200, $headers);
} }
private function createFeedItemFromException($e, BridgeAbstract $bridge): array private function createFeedItemFromException($e, BridgeInterface $bridge): FeedItem
{ {
$item = []; $item = new FeedItem();
// Create a unique identifier every 24 hours // Create a unique identifier every 24 hours
$uniqueIdentifier = urlencode((int)(time() / 86400)); $uniqueIdentifier = urlencode((int)(time() / 86400));
$title = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier); $itemTitle = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier);
$item->setTitle($itemTitle);
$item['title'] = $title; $item->setURI(get_current_url());
$item['uri'] = get_current_url(); $item->setTimestamp(time());
$item['timestamp'] = time();
// Create an item identifier for feed readers e.g. "staysafetv twitch videos_19389" // Create an item identifier for feed readers e.g. "staysafetv twitch videos_19389"
$item['uid'] = $bridge->getName() . '_' . $uniqueIdentifier; $item->setUid($bridge->getName() . '_' . $uniqueIdentifier);
$content = render_template(__DIR__ . '/../templates/bridge-error.html.php', [ $content = render_template(__DIR__ . '/../templates/bridge-error.html.php', [
'error' => render_template(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 'error' => render_template(__DIR__ . '/../templates/error.html.php', ['e' => $e]),
'searchUrl' => self::createGithubSearchUrl($bridge), 'searchUrl' => self::createGithubSearchUrl($bridge),
'issueUrl' => self::createGithubIssueUrl($bridge, $e), 'issueUrl' => self::createGithubIssueUrl($bridge, $e, create_sane_exception_message($e)),
'maintainer' => $bridge->getMaintainer(), 'maintainer' => $bridge->getMaintainer(),
]); ]);
$item['content'] = $content; $item->setContent($content);
return $item; return $item;
} }
private function logBridgeError($bridgeName, $code) private static function logBridgeError($bridgeName, $code)
{ {
// todo: it's not really necessary to json encode $report $cache = RssBridge::getCache();
$cacheKey = 'error_reporting_' . $bridgeName . '_' . $code; $cache->setScope('error_reporting');
$report = $this->cache->get($cacheKey); $cache->setkey([$bridgeName . '_' . $code]);
if ($report) {
if ($report = $cache->loadData()) {
$report = Json::decode($report); $report = Json::decode($report);
$report['time'] = time(); $report['time'] = time();
$report['count']++; $report['count']++;
@ -186,39 +228,26 @@ class DisplayAction implements ActionInterface
'count' => 1, 'count' => 1,
]; ];
} }
$ttl = 86400 * 5; $cache->saveData(Json::encode($report));
$this->cache->set($cacheKey, Json::encode($report), $ttl);
return $report['count']; return $report['count'];
} }
private static function createGithubIssueUrl(BridgeAbstract $bridge, \Throwable $e): string private static function createGithubIssueUrl($bridge, $e, string $message): string
{ {
$maintainer = $bridge->getMaintainer(); return sprintf('https://github.com/RSS-Bridge/rss-bridge/issues/new?%s', http_build_query([
if (str_contains($maintainer, ',')) { 'title' => sprintf('%s failed with error %s', $bridge->getName(), $e->getCode()),
$maintainers = explode(',', $maintainer);
} else {
$maintainers = [$maintainer];
}
$maintainers = array_map('trim', $maintainers);
$queryString = $_SERVER['QUERY_STRING'] ?? '';
$query = [
'title' => $bridge->getName() . ' failed with: ' . $e->getMessage(),
'body' => sprintf( 'body' => sprintf(
"```\n%s\n\n%s\n\nQuery string: %s\nVersion: %s\nOs: %s\nPHP version: %s\n```\nMaintainer: @%s", "```\n%s\n\n%s\n\nQuery string: %s\nVersion: %s\nOs: %s\nPHP version: %s\n```",
create_sane_exception_message($e), $message,
implode("\n", trace_to_call_points(trace_from_exception($e))), implode("\n", trace_to_call_points(trace_from_exception($e))),
$queryString, $_SERVER['QUERY_STRING'] ?? '',
Configuration::getVersion(), Configuration::getVersion(),
PHP_OS_FAMILY, PHP_OS_FAMILY,
phpversion() ?: 'Unknown', phpversion() ?: 'Unknown'
implode(', @', $maintainers),
), ),
'labels' => 'Bridge-Broken', 'labels' => 'Bridge-Broken',
'assignee' => $maintainer[0], 'assignee' => $bridge->getMaintainer(),
]; ]));
return 'https://github.com/RSS-Bridge/rss-bridge/issues/new?' . http_build_query($query);
} }
private static function createGithubSearchUrl($bridge): string private static function createGithubSearchUrl($bridge): string

View File

@ -1,95 +0,0 @@
<?php
/**
* This action is used by the frontpage form search.
* It finds a bridge based off of a user input url.
* It uses bridges' detectParameters implementation.
*/
class FindfeedAction implements ActionInterface
{
private BridgeFactory $bridgeFactory;
public function __construct(
BridgeFactory $bridgeFactory
) {
$this->bridgeFactory = $bridgeFactory;
}
public function __invoke(Request $request): Response
{
$url = $request->get('url');
$format = $request->get('format');
if (!$url) {
return new Response('You must specify a url', 400);
}
if (!$format) {
return new Response('You must specify a format', 400);
}
$results = [];
foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
if (!$this->bridgeFactory->isEnabled($bridgeClassName)) {
continue;
}
$bridge = $this->bridgeFactory->create($bridgeClassName);
$bridgeParams = $bridge->detectParameters($url);
if ($bridgeParams === null) {
continue;
}
// It's allowed to have no 'context' in a bridge (only a default context without any name)
// In this case, the reference to the parameters are found in the first element of the PARAMETERS array
$context = $bridgeParams['context'] ?? 0;
$bridgeData = [];
// Construct the array of parameters
foreach ($bridgeParams as $key => $value) {
// 'context' is a special case : it's a bridge parameters, there is no "name" for this parameter
if ($key == 'context') {
$bridgeData[$key]['name'] = 'Context';
$bridgeData[$key]['value'] = $value;
} else {
$bridgeData[$key]['name'] = $this->getParameterName($bridge, $context, $key);
$bridgeData[$key]['value'] = $value;
}
}
$bridgeParams['bridge'] = $bridgeClassName;
$bridgeParams['format'] = $format;
$content = [
'url' => './?action=display&' . http_build_query($bridgeParams),
'bridgeParams' => $bridgeParams,
'bridgeData' => $bridgeData,
'bridgeMeta' => [
'name' => $bridge::NAME,
'description' => $bridge::DESCRIPTION,
'parameters' => $bridge::PARAMETERS,
'icon' => $bridge->getIcon(),
],
];
$results[] = $content;
}
if ($results === []) {
return new Response(Json::encode(['message' => 'No bridge found for given url']), 404, ['content-type' => 'application/json']);
}
return new Response(Json::encode($results), 200, ['content-type' => 'application/json']);
}
// Get parameter name in the actual context, or in the global parameter
private function getParameterName($bridge, $context, $key)
{
if (isset($bridge::PARAMETERS[$context][$key]['name'])) {
$name = $bridge::PARAMETERS[$context][$key]['name'];
} else if (isset($bridge::PARAMETERS['global'][$key]['name'])) {
$name = $bridge::PARAMETERS['global'][$key]['name'];
} else {
$name = 'Variable "' . $key . '" (No name provided)';
}
return $name;
}
}

View File

@ -2,48 +2,35 @@
final class FrontpageAction implements ActionInterface final class FrontpageAction implements ActionInterface
{ {
private BridgeFactory $bridgeFactory; public function execute(array $request)
public function __construct(
BridgeFactory $bridgeFactory
) {
$this->bridgeFactory = $bridgeFactory;
}
public function __invoke(Request $request): Response
{ {
$token = $request->getAttribute('token'); $showInactive = (bool) ($request['show_inactive'] ?? null);
$messages = [];
$activeBridges = 0; $activeBridges = 0;
$bridgeClassNames = $this->bridgeFactory->getBridgeClassNames(); $bridgeFactory = new BridgeFactory();
$bridgeClassNames = $bridgeFactory->getBridgeClassNames();
foreach ($this->bridgeFactory->getMissingEnabledBridges() as $missingEnabledBridge) { $formatFactory = new FormatFactory();
$messages[] = [ $formats = $formatFactory->getFormatNames();
'body' => sprintf('Warning : Bridge "%s" not found', $missingEnabledBridge),
'level' => 'warning'
];
}
$body = ''; $body = '';
foreach ($bridgeClassNames as $bridgeClassName) { foreach ($bridgeClassNames as $bridgeClassName) {
if ($this->bridgeFactory->isEnabled($bridgeClassName)) { if ($bridgeFactory->isEnabled($bridgeClassName)) {
$body .= BridgeCard::render($this->bridgeFactory, $bridgeClassName, $token); $body .= BridgeCard::displayBridgeCard($bridgeClassName, $formats);
$activeBridges++; $activeBridges++;
} elseif ($showInactive) {
$body .= BridgeCard::displayBridgeCard($bridgeClassName, $formats, false) . PHP_EOL;
} }
} }
$response = new Response(render(__DIR__ . '/../templates/frontpage.html.php', [ return render(__DIR__ . '/../templates/frontpage.html.php', [
'messages' => $messages, 'messages' => [],
'admin_email' => Configuration::getConfig('admin', 'email'), 'admin_email' => Configuration::getConfig('admin', 'email'),
'admin_telegram' => Configuration::getConfig('admin', 'telegram'), 'admin_telegram' => Configuration::getConfig('admin', 'telegram'),
'bridges' => $body, 'bridges' => $body,
'active_bridges' => $activeBridges, 'active_bridges' => $activeBridges,
'total_bridges' => count($bridgeClassNames), 'total_bridges' => count($bridgeClassNames),
])); 'show_inactive' => $showInactive,
]);
// TODO: The rendered template could be cached, but beware config changes that changes the html
return $response;
} }
} }

View File

@ -4,7 +4,7 @@ declare(strict_types=1);
class HealthAction implements ActionInterface class HealthAction implements ActionInterface
{ {
public function __invoke(Request $request): Response public function execute(array $request)
{ {
$response = [ $response = [
'code' => 200, 'code' => 200,

View File

@ -1,26 +1,32 @@
<?php <?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
class ListAction implements ActionInterface class ListAction implements ActionInterface
{ {
private BridgeFactory $bridgeFactory; public function execute(array $request)
public function __construct(
BridgeFactory $bridgeFactory
) {
$this->bridgeFactory = $bridgeFactory;
}
public function __invoke(Request $request): Response
{ {
$list = new \stdClass(); $list = new \stdClass();
$list->bridges = []; $list->bridges = [];
$list->total = 0; $list->total = 0;
foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) { $bridgeFactory = new BridgeFactory();
$bridge = $this->bridgeFactory->create($bridgeClassName);
foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
$bridge = $bridgeFactory->create($bridgeClassName);
$list->bridges[$bridgeClassName] = [ $list->bridges[$bridgeClassName] = [
'status' => $this->bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive', 'status' => $bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive',
'uri' => $bridge->getURI(), 'uri' => $bridge->getURI(),
'donationUri' => $bridge->getDonationURI(), 'donationUri' => $bridge->getDonationURI(),
'name' => $bridge->getName(), 'name' => $bridge->getName(),
@ -31,6 +37,6 @@ class ListAction implements ActionInterface
]; ];
} }
$list->total = count($list->bridges); $list->total = count($list->bridges);
return new Response(Json::encode($list), 200, ['content-type' => 'application/json']); return new Response(Json::encode($list), 200, ['Content-Type' => 'application/json']);
} }
} }

View File

@ -0,0 +1,50 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
class SetBridgeCacheAction implements ActionInterface
{
public function execute(array $request)
{
$authenticationMiddleware = new ApiAuthenticationMiddleware();
$authenticationMiddleware($request);
$key = $request['key'] or returnClientError('You must specify key!');
$bridgeFactory = new BridgeFactory();
$bridgeClassName = $bridgeFactory->createBridgeClassName($request['bridge'] ?? '');
// whitelist control
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
throw new \Exception('This bridge is not whitelisted', 401);
die;
}
$bridge = $bridgeFactory->create($bridgeClassName);
$bridge->loadConfiguration();
$value = $request['value'];
$cache = RssBridge::getCache();
$cache->setScope(get_class($bridge));
if (!is_array($key)) {
// not sure if $key is an array when it comes in from request
$key = [$key];
}
$cache->setKey($key);
$cache->saveData($value);
header('Content-Type: text/plain');
echo 'done';
}
}

View File

@ -1,16 +0,0 @@
#!/usr/bin/env php
<?php
/**
* Remove all items from the cache
*/
require __DIR__ . '/../lib/bootstrap.php';
require __DIR__ . '/../lib/config.php';
$container = require __DIR__ . '/../lib/dependencies.php';
/** @var CacheInterface $cache */
$cache = $container['cache'];
$cache->clear();

View File

@ -1,24 +0,0 @@
#!/usr/bin/env php
<?php
/**
* Remove all expired items from the cache
*/
require __DIR__ . '/../lib/bootstrap.php';
require __DIR__ . '/../lib/config.php';
$container = require __DIR__ . '/../lib/dependencies.php';
if (
Configuration::getConfig('cache', 'type') === 'file'
&& !Configuration::getConfig('FileCache', 'enable_purge')
) {
// Override enable_purge for this particular execution
Configuration::setConfig('FileCache', 'enable_purge', true);
}
/** @var CacheInterface $cache */
$cache = $container['cache'];
$cache->prune();

View File

@ -1,20 +0,0 @@
#!/usr/bin/env php
<?php
/**
* Add log records to all three levels (for testing purposes)
*/
require __DIR__ . '/../lib/bootstrap.php';
require __DIR__ . '/../lib/config.php';
$container = require __DIR__ . '/../lib/dependencies.php';
/** @var Logger $logger */
$logger = $container['logger'];
$logger->debug('This is a test debug message');
$logger->info('This is a test info message');
$logger->error('This is a test error message');

View File

@ -31,17 +31,17 @@ class ABCNewsBridge extends BridgeAbstract
{ {
$url = sprintf('https://www.abc.net.au/news/%s', $this->getInput('topic')); $url = sprintf('https://www.abc.net.au/news/%s', $this->getInput('topic'));
$dom = getSimpleHTMLDOM($url); $dom = getSimpleHTMLDOM($url);
$dom = $dom->find('div[data-component="PaginationList"]', 0); $dom = $dom->find('div[data-component="CardList"]', 0);
if (!$dom) { if (!$dom) {
throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); throw new \Exception(sprintf('Unable to find css selector on `%s`', $url));
} }
$dom = defaultLinkTo($dom, $this->getURI()); $dom = defaultLinkTo($dom, $this->getURI());
foreach ($dom->find('article[data-component="DetailCard"]') as $article) { foreach ($dom->find('div[data-component="GenericCard"]') as $article) {
$a = $article->find('a', 0); $a = $article->find('a', 0);
$this->items[] = [ $this->items[] = [
'title' => $a->plaintext, 'title' => $a->plaintext,
'uri' => $a->href, 'uri' => $a->href,
'content' => $article->find('p', 0)->plaintext, 'content' => $article->find('[data-component="CardDescription"]', 0)->plaintext,
'timestamp' => strtotime($article->find('time', 0)->datetime), 'timestamp' => strtotime($article->find('time', 0)->datetime),
]; ];
} }

View File

@ -12,29 +12,9 @@ class AO3Bridge extends BridgeAbstract
'url' => [ 'url' => [
'name' => 'url', 'name' => 'url',
'required' => true, 'required' => true,
// Example: F/F tag // Example: F/F tag, complete works only
'exampleValue' => 'https://archiveofourown.org/tags/F*s*F/works', 'exampleValue' => 'https://archiveofourown.org/works?work_search[complete]=T&tag_id=F*s*F',
], ],
'range' => [
'name' => 'Chapter Content',
'title' => 'Chapter(s) to include in each work\'s feed entry',
'defaultValue' => null,
'type' => 'list',
'values' => [
'None' => null,
'First' => 'first',
'Latest' => 'last',
'Entire work' => 'all',
],
],
'unique' => [
'name' => 'Make separate entries for new fic chapters',
'type' => 'checkbox',
'required' => false,
'title' => 'Make separate entries for new fic chapters',
'defaultValue' => 'checked',
],
'limit' => self::LIMIT,
], ],
'Bookmarks' => [ 'Bookmarks' => [
'user' => [ 'user' => [
@ -53,19 +33,23 @@ class AO3Bridge extends BridgeAbstract
], ],
] ]
]; ];
private $title;
public function collectData() public function collectData()
{ {
switch ($this->queriedContext) { switch ($this->queriedContext) {
case 'Bookmarks': case 'Bookmarks':
$this->collectList($this->getURI()); $user = $this->getInput('user');
$this->title = $user;
$url = self::URI
. '/users/' . $user
. '/bookmarks?bookmark_search[sort_column]=bookmarkable_date';
$this->collectList($url);
break; break;
case 'List': case 'List':
$this->collectList($this->getURI()); $this->collectList($this->getInput('url'));
break; break;
case 'Work': case 'Work':
$this->collectWork($this->getURI()); $this->collectWork($this->getInput('id'));
break; break;
} }
} }
@ -76,24 +60,9 @@ class AO3Bridge extends BridgeAbstract
*/ */
private function collectList($url) private function collectList($url)
{ {
$version = 'v0.0.1'; $html = getSimpleHTMLDOM($url);
$headers = [
"useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"
];
$response = getContents($url, $headers);
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI); $html = defaultLinkTo($html, self::URI);
// Get list title. Will include page range + count in some cases
$heading = ($html->find('#main h2', 0));
if ($heading->find('a.tag')) {
$heading = $heading->find('a.tag', 0);
}
$this->title = $heading->plaintext;
$limit = $this->getInput('limit') ?? 3;
$count = 0;
foreach ($html->find('.index.group > li') as $element) { foreach ($html->find('.index.group > li') as $element) {
$item = []; $item = [];
@ -102,70 +71,16 @@ class AO3Bridge extends BridgeAbstract
continue; // discard deleted works continue; // discard deleted works
} }
$item['title'] = $title->plaintext; $item['title'] = $title->plaintext;
$item['content'] = $element;
$item['uri'] = $title->href; $item['uri'] = $title->href;
$strdate = $element->find('div p.datetime', 0)->plaintext; $strdate = $element->find('div p.datetime', 0)->plaintext;
$item['timestamp'] = strtotime($strdate); $item['timestamp'] = strtotime($strdate);
// detach from rest of page because remove() is buggy
$element = str_get_html($element->outertext());
$tags = $element->find('ul.required-tags', 0);
foreach ($tags->childNodes() as $tag) {
$item['categories'][] = html_entity_decode($tag->plaintext);
}
$tags->remove();
$tags = $element->find('ul.tags', 0);
foreach ($tags->childNodes() as $tag) {
$item['categories'][] = html_entity_decode($tag->plaintext);
}
$tags->remove();
$item['content'] = implode('', $element->childNodes());
$chapters = $element->find('dl dd.chapters', 0); $chapters = $element->find('dl dd.chapters', 0);
// bookmarked series and external works do not have a chapters count // bookmarked series and external works do not have a chapters count
$chapters = (isset($chapters) ? $chapters->plaintext : 0); $chapters = (isset($chapters) ? $chapters->plaintext : 0);
if ($this->getInput('unique')) {
$item['uid'] = $item['uri'] . "/$strdate/$chapters"; $item['uid'] = $item['uri'] . "/$strdate/$chapters";
} else {
$item['uid'] = $item['uri'];
}
// Fetch workskin of desired chapter(s) in list
if ($this->getInput('range') && ($limit == 0 || $count++ < $limit)) {
$url = $item['uri'];
switch ($this->getInput('range')) {
case ('all'):
$url .= '?view_full_work=true';
break;
case ('first'):
break;
case ('last'):
// only way to get this is using the navigate page unfortunately
$url .= '/navigate';
$response = getContents($url, $headers);
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI);
$url = $html->find('ol.index.group > li > a', -1)->href;
break;
}
$response = getContents($url, $headers);
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI);
// remove duplicate fic summary
if ($ficsum = $html->find('#workskin > .preface > .summary', 0)) {
$ficsum->remove();
}
$item['content'] .= $html->find('#workskin', 0);
}
// Use predictability of download links to generate enclosures
$wid = explode('/', $item['uri'])[4];
foreach (['azw3', 'epub', 'mobi', 'pdf', 'html'] as $ext) {
$item['enclosures'][] = 'https://archiveofourown.org/downloads/' . $wid . '/work.' . $ext;
}
$this->items[] = $item; $this->items[] = $item;
} }
@ -174,31 +89,20 @@ class AO3Bridge extends BridgeAbstract
/** /**
* Feed for recent chapters of a specific work. * Feed for recent chapters of a specific work.
*/ */
private function collectWork($url) private function collectWork($id)
{ {
$version = 'v0.0.1'; $url = self::URI . "/works/$id/navigate";
$headers = [ $response = _http_request($url, ['useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)']);
"useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)" $html = \str_get_html($response['body']);
];
$response = getContents($url . '/navigate', $headers);
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI); $html = defaultLinkTo($html, self::URI);
$response = getContents($url . '?view_full_work=true', $headers);
$workhtml = \str_get_html($response);
$workhtml = defaultLinkTo($workhtml, self::URI);
$this->title = $html->find('h2 a', 0)->plaintext; $this->title = $html->find('h2 a', 0)->plaintext;
$nav = $html->find('ol.index.group > li'); foreach ($html->find('ol.index.group > li') as $element) {
for ($i = 0; $i < count($nav); $i++) {
$item = []; $item = [];
$element = $nav[$i];
$item['title'] = $element->find('a', 0)->plaintext; $item['title'] = $element->find('a', 0)->plaintext;
$item['content'] = $workhtml->find('#chapter-' . ($i + 1), 0); $item['content'] = $element;
$item['uri'] = $element->find('a', 0)->href; $item['uri'] = $element->find('a', 0)->href;
$strdate = $element->find('span.datetime', 0)->plaintext; $strdate = $element->find('span.datetime', 0)->plaintext;
@ -227,24 +131,4 @@ class AO3Bridge extends BridgeAbstract
{ {
return self::URI . '/favicon.ico'; return self::URI . '/favicon.ico';
} }
public function getURI()
{
$url = parent::getURI();
switch ($this->queriedContext) {
case 'Bookmarks':
$user = $this->getInput('user');
$url = self::URI
. '/users/' . $user
. '/bookmarks?bookmark_search[sort_column]=bookmarkable_date';
break;
case 'List':
$url = $this->getInput('url');
break;
case 'Work':
$url = self::URI . '/works/' . $this->getInput('id');
break;
}
return $url;
}
} }

View File

@ -63,13 +63,11 @@ class ARDAudiothekBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
$path = $this->getInput('path');
$limit = $this->getInput('limit');
$oldTz = date_default_timezone_get(); $oldTz = date_default_timezone_get();
date_default_timezone_set('Europe/Berlin'); date_default_timezone_set('Europe/Berlin');
$pathComponents = explode('/', $path); $pathComponents = explode('/', $this->getInput('path'));
if (empty($pathComponents)) { if (empty($pathComponents)) {
returnClientError('Path may not be empty'); returnClientError('Path may not be empty');
} }
@ -84,21 +82,17 @@ class ARDAudiothekBridge extends BridgeAbstract
} }
$url = self::APIENDPOINT . 'programsets/' . $showID . '/'; $url = self::APIENDPOINT . 'programsets/' . $showID . '/';
$json1 = getContents($url); $rawJSON = getContents($url);
$data1 = Json::decode($json1, false); $processedJSON = json_decode($rawJSON)->data->programSet;
$processedJSON = $data1->data->programSet;
if (!$processedJSON) {
throw new \Exception('Unable to find show id: ' . $showID);
}
$limit = $this->getInput('limit');
$answerLength = 1; $answerLength = 1;
$offset = 0; $offset = 0;
$numberOfElements = 1; $numberOfElements = 1;
while ($answerLength != 0 && $offset < $numberOfElements && (is_null($limit) || $offset < $limit)) { while ($answerLength != 0 && $offset < $numberOfElements && (is_null($limit) || $offset < $limit)) {
$json2 = getContents($url . '?offset=' . $offset); $rawJSON = getContents($url . '?offset=' . $offset);
$data2 = Json::decode($json2, false); $processedJSON = json_decode($rawJSON)->data->programSet;
$processedJSON = $data2->data->programSet;
$answerLength = count($processedJSON->items->nodes); $answerLength = count($processedJSON->items->nodes);
$offset = $offset + $answerLength; $offset = $offset + $answerLength;
@ -119,16 +113,7 @@ class ARDAudiothekBridge extends BridgeAbstract
$item['timestamp'] = $audio->publicationStartDateAndTime; $item['timestamp'] = $audio->publicationStartDateAndTime;
$item['uid'] = $audio->id; $item['uid'] = $audio->id;
$item['author'] = $audio->programSet->publicationService->title; $item['author'] = $audio->programSet->publicationService->title;
$item['categories'] = [ $audio->programSet->editorialCategories->title ];
$category = $audio->programSet->editorialCategories->title ?? null;
if ($category) {
$item['categories'] = [$category];
}
$item['itunes'] = [
'duration' => $audio->duration,
];
$this->items[] = $item; $this->items[] = $item;
} }
} }

View File

@ -40,11 +40,6 @@ class ARDMediathekBridge extends BridgeAbstract
* @const IMAGEWIDTHPLACEHOLDER * @const IMAGEWIDTHPLACEHOLDER
*/ */
const IMAGEWIDTHPLACEHOLDER = '{width}'; const IMAGEWIDTHPLACEHOLDER = '{width}';
/**
* Title of the current show
* @var string
*/
private $title;
const PARAMETERS = [ const PARAMETERS = [
[ [
@ -77,7 +72,7 @@ class ARDMediathekBridge extends BridgeAbstract
} }
} }
$url = self::APIENDPOINT . $showID . '?pageSize=' . self::PAGESIZE; $url = self::APIENDPOINT . $showID . '/?pageSize=' . self::PAGESIZE;
$rawJSON = getContents($url); $rawJSON = getContents($url);
$processedJSON = json_decode($rawJSON); $processedJSON = json_decode($rawJSON);
@ -98,17 +93,6 @@ class ARDMediathekBridge extends BridgeAbstract
$this->items[] = $item; $this->items[] = $item;
} }
$this->title = $processedJSON->title;
date_default_timezone_set($oldTz); date_default_timezone_set($oldTz);
} }
/** {@inheritdoc} */
public function getName()
{
if (!empty($this->title)) {
return $this->title;
}
return parent::getName();
}
} }

View File

@ -20,14 +20,17 @@ class AcrimedBridge extends FeedExpander
public function collectData() public function collectData()
{ {
$url = 'https://www.acrimed.org/spip.php?page=backend'; $this->collectExpandableDatas(
$limit = $this->getInput('limit'); static::URI . 'spip.php?page=backend',
$this->collectExpandableDatas($url, $limit); $this->getInput('limit')
);
} }
protected function parseItem(array $item) protected function parseItem($newsItem)
{ {
$articlePage = getSimpleHTMLDOM($item['uri']); $item = parent::parseItem($newsItem);
$articlePage = getSimpleHTMLDOM($newsItem->link);
$article = sanitize($articlePage->find('article.article1', 0)->innertext); $article = sanitize($articlePage->find('article.article1', 0)->innertext);
$article = defaultLinkTo($article, static::URI); $article = defaultLinkTo($article, static::URI);
$item['content'] = $article; $item['content'] = $article;

View File

@ -1,45 +0,0 @@
<?php
class ActivisionResearchBridge extends BridgeAbstract
{
const NAME = 'Activision Research Blog';
const URI = 'https://research.activision.com';
const DESCRIPTION = 'Posts from the Activision Research blog';
const MAINTAINER = 'thefranke';
const CACHE_TIMEOUT = 86400; // 24h
public function collectData()
{
$dom = getSimpleHTMLDOM(static::URI);
$dom = $dom->find('div[id="home-blog-feed"]', 0);
if (!$dom) {
throw new \Exception(sprintf('Unable to find css selector on `%s`', $url));
}
$dom = defaultLinkTo($dom, $this->getURI());
foreach ($dom->find('div[class="blog-entry"]') as $article) {
$a = $article->find('a', 0);
$blogimg = extractFromDelimiters($article->find('div[class="blog-img"]', 0)->style, 'url(', ')');
$title = htmlspecialchars_decode($article->find('div[class="title"]', 0)->plaintext);
$author = htmlspecialchars_decode($article->find('div[class="author]', 0)->plaintext);
$date = $article->find('div[class="pubdate"]', 0)->plaintext;
$entry = getSimpleHTMLDOMCached($a->href, static::CACHE_TIMEOUT * 7 * 4);
$entry = defaultLinkTo($entry, $this->getURI());
$content = $entry->find('div[class="blog-body"]', 0);
$tagsremove = ['script', 'iframe', 'input', 'form'];
$content = sanitize($content, $tagsremove);
$content = '<img src="' . static::URI . $blogimg . '" alt="">' . $content;
$this->items[] = [
'title' => $title,
'author' => $author,
'uri' => $a->href,
'content' => $content,
'timestamp' => strtotime($date),
];
}
}
}

View File

@ -32,7 +32,8 @@ class AirBreizhBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
$html = ''; $html = '';
$html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme')); $html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme'))
or returnClientError('No results for this query.');
foreach ($html->find('article') as $article) { foreach ($html->find('article') as $article) {
$item = []; $item = [];

View File

@ -13,42 +13,26 @@ class AllegroBridge extends BridgeAbstract
'exampleValue' => 'https://allegro.pl/kategoria/swieze-warzywa-cebula-318660', 'exampleValue' => 'https://allegro.pl/kategoria/swieze-warzywa-cebula-318660',
'required' => true, 'required' => true,
], ],
'cookie' => [ 'sessioncookie' => [
'name' => 'The complete cookie value', 'name' => 'The \'wdctx\' session cookie',
'title' => 'Paste the value of the cookie value from your browser if you want to prevent Allegro imposing rate limits', 'title' => 'Paste the value of the \'wdctx\' cookie from your browser if you want to prevent Allegro imposing rate limits',
'pattern' => '^.{250,};?$',
// phpcs:ignore
'exampleValue' => 'v4.1-oCrmXTMqv2ppC21GTUCKLmUwRPP1ssQVALKuqwsZ1VXjcKgL2vO5TTRM5xMxS9GiyqxF1gAeyc-63dl0coUoBKXCXi_nAmr95yyqGpq2RAFoneZ4L399E8n6iYyemcuGARjAoSfjvLHJCEwvvHHynSgaxlFBu7hUnKfuy39zo9sSQdyTUjotJg3CAZ53q9v2raAnPCyGOAR4ytRILd9p24EJnxp7_oR0XbVPIo1hDa4WmjXFOxph8rHaO5tWd',
'required' => false, 'required' => false,
], ],
'includeSponsoredOffers' => [ 'includeSponsoredOffers' => [
'type' => 'checkbox', 'type' => 'checkbox',
'name' => 'Include Sponsored Offers', 'name' => 'Include Sponsored Offers'
'defaultValue' => 'checked'
],
'includePromotedOffers' => [
'type' => 'checkbox',
'name' => 'Include Promoted Offers',
'defaultValue' => 'checked'
] ]
]]; ]];
public function getName() public function getName()
{ {
$url = $this->getInput('url'); parse_str(parse_url($this->getInput('url'), PHP_URL_QUERY), $fields);
if (!$url) {
return parent::getName();
}
$parsedUrl = parse_url($url, PHP_URL_QUERY);
if (!$parsedUrl) {
return parent::getName();
}
parse_str($parsedUrl, $fields);
if (array_key_exists('string', $fields)) { if ($query = array_key_exists('string', $fields) ? urldecode($fields['string']) : false) {
$f = urldecode($fields['string']); return $query;
} else {
$f = false;
}
if ($f) {
return $f;
} }
return parent::getName(); return parent::getName();
@ -67,9 +51,9 @@ class AllegroBridge extends BridgeAbstract
$opts = []; $opts = [];
// If a cookie is provided // If a session cookie is provided
if ($cookie = $this->getInput('cookie')) { if ($sessioncookie = $this->getInput('sessioncookie')) {
$opts[CURLOPT_COOKIE] = $cookie; $opts[CURLOPT_COOKIE] = 'wdctx=' . $sessioncookie;
} }
$html = getSimpleHTMLDOM($url, [], $opts); $html = getSimpleHTMLDOM($url, [], $opts);
@ -79,57 +63,58 @@ class AllegroBridge extends BridgeAbstract
return; return;
} }
$results = $html->find('article[data-analytics-view-custom-context="REGULAR"]'); $results = $html->find('._6a66d_V7Lel article');
if ($this->getInput('includeSponsoredOffers')) { if (!$this->getInput('includeSponsoredOffers')) {
$results = array_merge($results, $html->find('article[data-analytics-view-custom-context="SPONSORED"]')); $results = array_filter($results, function ($node) {
} return $node->{'data-analytics-view-label'} != 'showSponsoredItems';
});
if ($this->getInput('includePromotedOffers')) {
$results = array_merge($results, $html->find('article[data-analytics-view-custom-context="PROMOTED"]'));
} }
foreach ($results as $post) { foreach ($results as $post) {
$item = []; $item = [];
$item['uri'] = $post->find('._6a66d_LX75-', 0)->href;
//TODO: port this over, whatever it does, from https://github.com/MK-PL/AllegroRSS
// if (arrayLinks.includes('events/clicks?')) {
// let sponsoredLink = new URL(arrayLinks).searchParams.get('redirect')
// arrayLinks = sponsoredLink.slice(0, sponsoredLink.indexOf('?'))
// }
$item['title'] = $post->find('._6a66d_LX75-', 0)->innertext;
$item['uid'] = $post->{'data-analytics-view-value'}; $item['uid'] = $post->{'data-analytics-view-value'};
$item_link = $post->find('a[href*="' . $item['uid'] . '"], a[href*="allegrolokalnie"]', 0);
$item['uri'] = $item_link->href;
$item['title'] = $item_link->find('img', 0)->alt;
$image = $item_link->find('img', 0)->{'data-src'} ?: $item_link->find('img', 0)->src ?? false;
if ($image) {
$item['enclosures'] = [$image . '#.image'];
}
$price = $post->{'data-analytics-view-json-custom-price'};
if ($price) {
$priceDecoded = json_decode(html_entity_decode($price));
$price = $priceDecoded->amount . ' ' . $priceDecoded->currency;
}
$descriptionPatterns = ['/<\s*dt[^>]*>\b/', '/<\/dt>/', '/<\s*dd[^>]*>\b/', '/<\/dd>/']; $descriptionPatterns = ['/<\s*dt[^>]*>\b/', '/<\/dt>/', '/<\s*dd[^>]*>\b/', '/<\/dd>/'];
$descriptionReplacements = ['<span>', ':</span> ', '<strong>', '&emsp;</strong> ']; $descriptionReplacements = ['<span>', ':</span> ', '<strong>', '&emsp;</strong> '];
$description = $post->find('.m7er_k4.mpof_5r.mpof_z0_s', 0)->innertext; $description = $post->find('.m7er_k4.mpof_5r.mpof_z0_s', 0)->innertext;
$descriptionPretty = preg_replace($descriptionPatterns, $descriptionReplacements, $description); $descriptionPretty = preg_replace($descriptionPatterns, $descriptionReplacements, $description);
$pricingExtraInfo = array_filter($post->find('.mqu1_g3.mgn2_12'), function ($node) { $buyNowAuction = $post->find('.mqu1_g3.mvrt_0.mgn2_12', 0)->innertext ?? '';
$buyNowAuction = str_replace('</span><span', '</span> <span', $buyNowAuction);
$auctionTimeLeft = $post->find('._6a66d_ImOzU', 0)->innertext ?? '';
$price = $post->find('._6a66d_6R3iN', 0)->plaintext;
$price = empty($auctionTimeLeft) ? $price : $price . '- kwota licytacji';
$image = $post->find('._6a66d_44ioA img', 0)->{'data-src'} ?: $post->find('._6a66d_44ioA img', 0)->src ?? false;
if ($image) {
$item['enclosures'] = [$image . '#.image'];
}
$offerExtraInfo = array_filter($post->find('.mqu1_g3.mgn2_12'), function ($node) {
return empty($node->find('.mvrt_0')); return empty($node->find('.mvrt_0'));
}); });
$pricingExtraInfo = $pricingExtraInfo[0]->plaintext ?? ''; $offerExtraInfo = $offerExtraInfo[0]->plaintext ?? '';
$offerExtraInfo = array_map(function ($node) { $isSmart = $post->find('._6a66d_TC2Zk', 0)->innertext ?? '';
return str_contains($node->plaintext, 'zapłać później') ? '' : $node->outertext; if (str_contains($isSmart, 'z kurierem')) {
}, $post->find('div.mpof_ki.mwdn_1.mj7a_4.mgn2_12')); $offerExtraInfo .= ', Smart z kurierem';
} else {
$isSmart = $post->find('img[alt="Smart!"]', 0) ?? false; $offerExtraInfo .= ', Smart';
if ($isSmart) {
$pricingExtraInfo .= $isSmart->outertext;
} }
$item['categories'] = []; $item['categories'] = [];
@ -146,9 +131,11 @@ class AllegroBridge extends BridgeAbstract
. '<div><strong>' . '<div><strong>'
. $price . $price
. '</strong></div><div>' . '</strong></div><div>'
. implode('</div><div>', $offerExtraInfo) . $auctionTimeLeft
. '</div><div>'
. $buyNowAuction
. '</div><dl>' . '</div><dl>'
. $pricingExtraInfo . $offerExtraInfo
. '</dl><hr>'; . '</dl><hr>';
$this->items[] = $item; $this->items[] = $item;

View File

@ -24,7 +24,6 @@ class AllocineFRSortiesBridge extends BridgeAbstract
$thumb = $element->find('figure.thumbnail', 0); $thumb = $element->find('figure.thumbnail', 0);
$meta = $element->find('div.meta-body', 0); $meta = $element->find('div.meta-body', 0);
$synopsis = $element->find('div.synopsis', 0); $synopsis = $element->find('div.synopsis', 0);
$date = $element->find('span.date', 0);
$title = $element->find('a[class*=meta-title-link]', 0); $title = $element->find('a[class*=meta-title-link]', 0);
$content = trim(defaultLinkTo($thumb->outertext . $meta->outertext . $synopsis->outertext, static::URI)); $content = trim(defaultLinkTo($thumb->outertext . $meta->outertext . $synopsis->outertext, static::URI));
@ -35,32 +34,8 @@ class AllocineFRSortiesBridge extends BridgeAbstract
$item['content'] = $content; $item['content'] = $content;
$item['title'] = trim($title->innertext); $item['title'] = trim($title->innertext);
$item['timestamp'] = $this->frenchPubDateToTimestamp($date->plaintext);
$item['uri'] = static::BASE_URI . '/' . substr($title->href, 1); $item['uri'] = static::BASE_URI . '/' . substr($title->href, 1);
$this->items[] = $item; $this->items[] = $item;
} }
} }
private function frenchPubDateToTimestamp($date)
{
return strtotime(
strtr(
strtolower($date),
[
'janvier' => 'jan',
'février' => 'feb',
'mars' => 'march',
'avril' => 'apr',
'mai' => 'may',
'juin' => 'jun',
'juillet' => 'jul',
'août' => 'aug',
'septembre' => 'sep',
'octobre' => 'oct',
'novembre' => 'nov',
'décembre' => 'dec'
]
)
);
}
} }

View File

@ -125,13 +125,14 @@ class AmazonPriceTrackerBridge extends BridgeAbstract
*/ */
private function getImage($html) private function getImage($html)
{ {
$image = 'https://placekitten.com/200/300';
$imageSrc = $html->find('#main-image-container img', 0); $imageSrc = $html->find('#main-image-container img', 0);
if ($imageSrc) { if ($imageSrc) {
$hiresImage = $imageSrc->getAttribute('data-old-hires'); $hiresImage = $imageSrc->getAttribute('data-old-hires');
$dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image'); $dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image');
$image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute); $image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute);
} }
$image = $image ?: 'https://placekitten.com/200/300';
return <<<EOT return <<<EOT
<img width="300" style="max-width:300;max-height:300" src="$image" alt="{$this->title}" /> <img width="300" style="max-width:300;max-height:300" src="$image" alt="{$this->title}" />
@ -146,7 +147,7 @@ EOT;
{ {
$uri = $this->getURI(); $uri = $this->getURI();
return getSimpleHTMLDOM($uri); return getSimpleHTMLDOM($uri) ?: returnServerError('Could not request Amazon.');
} }
private function scrapePriceFromMetrics($html) private function scrapePriceFromMetrics($html)

View File

@ -1,278 +0,0 @@
<?php
class AnfrBridge extends BridgeAbstract
{
const NAME = 'ANFR';
const URI = 'https://data.anfr.fr/';
const DESCRIPTION = 'Fetches data from the French administration "Agence Nationale des Fréquences".';
const CACHE_TIMEOUT = 604800; // 7d
const MAINTAINER = 'quent1';
const PARAMETERS = [
'Données sur les réseaux mobiles' => [
'departement' => [
'name' => 'Département',
'type' => 'list',
'values' => [
'Tous' => null,
'Ain' => '001',
'Aisne' => '002',
'Allier' => '003',
'Alpes-de-Haute-Provence' => '004',
'Hautes-Alpes' => '005',
'Alpes-Maritimes' => '006',
'Ardèche' => '007',
'Ardennes' => '008',
'Ariège' => '009',
'Aube' => '010',
'Aude' => '011',
'Aveyron' => '012',
'Bouches-du-Rhône' => '013',
'Calvados' => '014',
'Cantal' => '015',
'Charente' => '016',
'Charente-Maritime' => '017',
'Cher' => '018',
'Corrèze' => '019',
'Corse-du-Sud' => '02A',
'Haute-Corse' => '02B',
'Côte-d\'Or' => '021',
'Côtes-d\'Armor' => '022',
'Creuse' => '023',
'Dordogne' => '024',
'Doubs' => '025',
'Drôme' => '026',
'Eure' => '027',
'Eure-et-Loir' => '028',
'Finistère' => '029',
'Gard' => '030',
'Haute-Garonne' => '031',
'Gers' => '032',
'Gironde' => '033',
'Hérault' => '034',
'Ille-et-Vilaine' => '035',
'Indre' => '036',
'Indre-et-Loire' => '037',
'Isère' => '038',
'Jura' => '039',
'Landes' => '040',
'Loir-et-Cher' => '041',
'Loire' => '042',
'Haute-Loire' => '043',
'Loire-Atlantique' => '044',
'Loiret' => '045',
'Lot' => '046',
'Lot-et-Garonne' => '047',
'Lozère' => '048',
'Maine-et-Loire' => '049',
'Manche' => '050',
'Marne' => '051',
'Haute-Marne' => '052',
'Mayenne' => '053',
'Meurthe-et-Moselle' => '054',
'Meuse' => '055',
'Morbihan' => '056',
'Moselle' => '057',
'Nièvre' => '058',
'Nord' => '059',
'Oise' => '060',
'Orne' => '061',
'Pas-de-Calais' => '062',
'Puy-de-Dôme' => '063',
'Pyrénées-Atlantiques' => '064',
'Hautes-Pyrénées' => '065',
'Pyrénées-Orientales' => '066',
'Bas-Rhin' => '067',
'Haut-Rhin' => '068',
'Rhône' => '069',
'Haute-Saône' => '070',
'Saône-et-Loire' => '071',
'Sarthe' => '072',
'Savoie' => '073',
'Haute-Savoie' => '074',
'Paris' => '075',
'Seine-Maritime' => '076',
'Seine-et-Marne' => '077',
'Yvelines' => '078',
'Deux-Sèvres' => '079',
'Somme' => '080',
'Tarn' => '081',
'Tarn-et-Garonne' => '082',
'Var' => '083',
'Vaucluse' => '084',
'Vendée' => '085',
'Vienne' => '086',
'Haute-Vienne' => '087',
'Vosges' => '088',
'Yonne' => '089',
'Territoire de Belfort' => '090',
'Essonne' => '091',
'Hauts-de-Seine' => '092',
'Seine-Saint-Denis' => '093',
'Val-de-Marne' => '094',
'Val-d\'Oise' => '095',
'Guadeloupe' => '971',
'Martinique' => '972',
'Guyane' => '973',
'La Réunion' => '974',
'Saint-Pierre-et-Miquelon' => '975',
'Mayotte' => '976',
'Saint-Barthélemy' => '977',
'Saint-Martin' => '978',
'Terres australes et antarctiques françaises' => '984',
'Wallis-et-Futuna' => '986',
'Polynésie française' => '987',
'Nouvelle-Calédonie' => '988',
'Île de Clipperton' => '989'
]
],
'generation' => [
'name' => 'Génération',
'type' => 'list',
'values' => [
'Tous' => null,
'2G' => '2G',
'3G' => '3G',
'4G' => '4G',
'5G' => '5G',
]
],
'operateur' => [
'name' => 'Opérateur',
'type' => 'list',
'values' => [
'Tous' => null,
'Bouygues Télécom' => 'BOUYGUES TELECOM',
'Dauphin Télécom' => 'DAUPHIN TELECOM',
'Digiciel' => 'DIGICEL',
'Free Caraïbes' => 'FREE CARAIBES',
'Free Mobile' => 'FREE MOBILE',
'GLOBALTEL' => 'GLOBALTEL',
'Office des postes et télécommunications de Nouvelle Calédonie' => 'Gouv Nelle Calédonie (OPT)',
'Maore Mobile' => 'MAORE MOBILE',
'ONATi' => 'ONATI',
'Orange' => 'ORANGE',
'Outremer Telecom' => 'OUTREMER TELECOM',
'Vodafone polynésie' => 'PMT/VODAPHONE',
'SFR' => 'SFR',
'SPM Télécom' => 'SPM TELECOM',
'Service des Postes et Télécommunications de Polynésie Française' => 'Gouv Nelle Calédonie (OPT)',
'SRR' => 'SRR',
'Station étrangère' => 'Station étrangère',
'Telco OI' => 'TELCO IO',
'United Telecommunication Services Caraïbes' => 'UTS Caraibes',
'Ora Mobile' => 'VITI SAS',
'Zeop' => 'ZEOP'
]
],
'statut' => [
'name' => 'Statut',
'type' => 'list',
'values' => [
'Tous' => null,
'En service' => 'En service',
'Projet approuvé' => 'Projet approuvé',
'Techniquement opérationnel' => 'Techniquement opérationnel',
]
]
]
];
public function collectData()
{
$urlParts = [
'id' => 'observatoire_2g_3g_4g',
'resource_id' => '88ef0887-6b0f-4d3f-8545-6d64c8f597da',
'fields' => 'id,adm_lb_nom,sta_nm_dpt,emr_lb_systeme,generation,date_maj,sta_nm_anfr,adr_lb_lieu,adr_lb_add1,adr_lb_add2,adr_lb_add3,adr_nm_cp,statut',
'rows' => 10000
];
if (!empty($this->getInput('departement'))) {
$urlParts['refine.sta_nm_dpt'] = urlencode($this->getInput('departement'));
}
if (!empty($this->getInput('generation'))) {
$urlParts['refine.generation'] = $this->getInput('generation');
}
if (!empty($this->getInput('operateur'))) {
// http_build_query() already does urlencoding so this call is redundant
$urlParts['refine.adm_lb_nom'] = urlencode($this->getInput('operateur'));
}
if (!empty($this->getInput('statut'))) {
$urlParts['refine.statut'] = urlencode($this->getInput('statut'));
}
// API seems to not play well with urlencoded data
$url = urljoin(static::URI, '/d4c/api/records/1.0/download/?' . urldecode(http_build_query($urlParts)));
$json = getContents($url);
$data = Json::decode($json, false);
$records = $data->records;
$frequenciesByStation = [];
foreach ($records as $record) {
if (!isset($frequenciesByStation[$record->fields->sta_nm_anfr])) {
$street = sprintf(
'%s %s %s',
$record->fields->adr_lb_add1 ?? '',
$record->fields->adr_lb_add2 ?? '',
$record->fields->adr_lb_add3 ?? ''
);
$frequenciesByStation[$record->fields->sta_nm_anfr] = [
'id' => $record->fields->sta_nm_anfr,
'operator' => $record->fields->adm_lb_nom,
'frequencies' => [],
'lastUpdate' => 0,
'address' => [
'street' => trim($street),
'postCode' => $record->fields->adr_nm_cp,
'city' => $record->fields->adr_lb_lieu
]
];
}
$frequenciesByStation[$record->fields->sta_nm_anfr]['frequencies'][] = [
'generation' => $record->fields->generation,
'frequency' => $record->fields->emr_lb_systeme,
'status' => $record->fields->statut,
'updatedAt' => strtotime($record->fields->date_maj),
];
$frequenciesByStation[$record->fields->sta_nm_anfr]['lastUpdate'] = max(
$frequenciesByStation[$record->fields->sta_nm_anfr]['lastUpdate'],
strtotime($record->fields->date_maj)
);
}
usort($frequenciesByStation, static fn ($a, $b) => $b['lastUpdate'] <=> $a['lastUpdate']);
foreach ($frequenciesByStation as $station) {
$title = sprintf(
'[%s] Mise à jour de la station n°%s à %s (%s)',
$station['operator'],
$station['id'],
$station['address']['city'],
$station['address']['postCode']
);
$array_reduce = array_reduce($station['frequencies'], static function ($carry, $frequency) {
return sprintf('%s<li>%s : %s</li>', $carry, $frequency['frequency'], $frequency['status']);
}, '');
$content = sprintf(
'<h1>Adresse complète</h1><p>%s<br>%s<br>%s</p><h1>Fréquences</h1><p><ul>%s</ul></p>',
$station['address']['street'],
$station['address']['postCode'],
$station['address']['city'],
$array_reduce
);
$this->items[] = [
'uid' => $station['id'],
'timestamp' => $station['lastUpdate'],
'title' => $title,
'content' => $content,
];
}
}
}

View File

@ -1,87 +0,0 @@
<?php
class AnisearchBridge extends BridgeAbstract
{
const MAINTAINER = 'Tone866';
const NAME = 'Anisearch';
const URI = 'https://www.anisearch.de';
const CACHE_TIMEOUT = 1800; // 30min
const DESCRIPTION = 'Feed for Anisearch';
const PARAMETERS = [[
'category' => [
'name' => 'Dub',
'type' => 'list',
'values' => [
'DE'
=> 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=de&sort=date&order=desc&view=4',
'EN'
=> 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=en&sort=date&order=desc&view=4',
'JP'
=> 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=ja&sort=date&order=desc&view=4'
]
],
'trailers' => [
'name' => 'Trailers',
'type' => 'checkbox',
'title' => 'Will include trailes',
'defaultValue' => false
]
]];
public function collectData()
{
$baseurl = 'https://www.anisearch.de/';
$trailers = false;
$trailers = $this->getInput('trailers');
$limit = 10;
if ($trailers) {
$limit = 5;
}
$dom = getSimpleHTMLDOM($this->getInput('category'));
foreach ($dom->find('li.btype0') as $key => $li) {
if ($key >= $limit) {
break;
}
$a = $li->find('a', 0);
$title = $a->find('span.title', 0);
$url = $baseurl . $a->href;
//get article
$domarticle = getSimpleHTMLDOM($url);
$content = $domarticle->find('div.details-text', 0);
//get header-image and set absolute src
$headerimage = $domarticle->find('img#details-cover', 0);
$src = $headerimage->src;
foreach ($content->find('.hidden') as $element) {
$element->remove();
}
//get trailer
$ytlink = '';
if ($trailers) {
$trailerlink = $domarticle->find('section#trailers > div > div.swiper > ul.swiper-wrapper > li.swiper-slide > a', 0);
if (isset($trailerlink)) {
$trailersite = getSimpleHTMLDOM($baseurl . $trailerlink->href);
$trailer = $trailersite->find('div#video > iframe', 0);
$trailer = $trailer->{'data-xsrc'};
$ytlink = <<<EOT
<br /><iframe width="560" height="315" src="$trailer" title="YouTube video player"
frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
EOT;
}
}
$this->items[] = [
'title' => $title->plaintext,
'uri' => $url,
'content' => $headerimage . '<br />' . $content . $ytlink
];
}
}
}

View File

@ -1,183 +0,0 @@
<?php
class AnnasArchiveBridge extends BridgeAbstract
{
const NAME = 'Anna\'s Archive';
const MAINTAINER = 'phantop';
const URI = 'https://annas-archive.org/';
const DESCRIPTION = 'Returns books from Anna\'s Archive';
const PARAMETERS = [
[
'q' => [
'name' => 'Query',
'exampleValue' => 'apothecary diaries',
'required' => true,
],
'ext' => [
'name' => 'Extension',
'type' => 'list',
'values' => [
'Any' => null,
'azw3' => 'azw3',
'cbr' => 'cbr',
'cbz' => 'cbz',
'djvu' => 'djvu',
'epub' => 'epub',
'fb2' => 'fb2',
'fb2.zip' => 'fb2.zip',
'mobi' => 'mobi',
'pdf' => 'pdf',
]
],
'lang' => [
'name' => 'Language',
'type' => 'list',
'values' => [
'Any' => null,
'Afrikaans [af]' => 'af',
'Arabic [ar]' => 'ar',
'Bangla [bn]' => 'bn',
'Belarusian [be]' => 'be',
'Bulgarian [bg]' => 'bg',
'Catalan [ca]' => 'ca',
'Chinese [zh]' => 'zh',
'Church Slavic [cu]' => 'cu',
'Croatian [hr]' => 'hr',
'Czech [cs]' => 'cs',
'Danish [da]' => 'da',
'Dongxiang [sce]' => 'sce',
'Dutch [nl]' => 'nl',
'English [en]' => 'en',
'French [fr]' => 'fr',
'German [de]' => 'de',
'Greek [el]' => 'el',
'Hebrew [he]' => 'he',
'Hindi [hi]' => 'hi',
'Hungarian [hu]' => 'hu',
'Indonesian [id]' => 'id',
'Irish [ga]' => 'ga',
'Italian [it]' => 'it',
'Japanese [ja]' => 'ja',
'Kazakh [kk]' => 'kk',
'Korean [ko]' => 'ko',
'Latin [la]' => 'la',
'Latvian [lv]' => 'lv',
'Lithuanian [lt]' => 'lt',
'Luxembourgish [lb]' => 'lb',
'Ndolo [ndl]' => 'ndl',
'Norwegian [no]' => 'no',
'Persian [fa]' => 'fa',
'Polish [pl]' => 'pl',
'Portuguese [pt]' => 'pt',
'Romanian [ro]' => 'ro',
'Russian [ru]' => 'ru',
'Serbian [sr]' => 'sr',
'Spanish [es]' => 'es',
'Swedish [sv]' => 'sv',
'Tamil [ta]' => 'ta',
'Traditional Chinese [zhHant]' => 'zhHant',
'Turkish [tr]' => 'tr',
'Ukrainian [uk]' => 'uk',
'Unknown language' => '_empty',
'Unknown language [und]' => 'und',
'Unknown language [urdu]' => 'urdu',
'Urdu [ur]' => 'ur',
'Vietnamese [vi]' => 'vi',
'Welsh [cy]' => 'cy',
]
],
'content' => [
'name' => 'Type',
'type' => 'list',
'values' => [
'Any' => null,
'Book (fiction)' => 'book_fiction',
'Book (nonfiction)' => 'book_nonfiction',
'Book (unknown)' => 'book_unknown',
'Comic book' => 'book_comic',
'Journal article' => 'journal_article',
'Magazine' => 'magazine',
'Standards document' => 'standards_document',
]
],
'src' => [
'name' => 'Source',
'type' => 'list',
'values' => [
'Any' => null,
'Internet Archive' => 'ia',
'Libgen.li' => 'lgli',
'Libgen.rs' => 'lgrs',
'SciHub' => 'scihub',
'ZLibrary' => 'zlib',
]
],
]
];
public function collectData()
{
$url = $this->getURI();
$list = getSimpleHTMLDOMCached($url);
$list = defaultLinkTo($list, self::URI);
// Don't attempt to do anything if not found message is given
if ($list->find('.js-not-found-additional')) {
return;
}
$elements = $list->find('.w-full > .mb-4 > div');
foreach ($elements as $element) {
// stop added entries once partial match list starts
if (str_contains($element->innertext, 'partial match')) {
break;
}
if ($element = $element->find('a', 0)) {
$item = [];
$item['title'] = $element->find('h3', 0)->plaintext;
$item['author'] = $element->find('div.italic', 0)->plaintext;
$item['uri'] = $element->href;
$item['content'] = $element->plaintext;
$item['uid'] = $item['uri'];
$item_html = getSimpleHTMLDOMCached($item['uri'], 86400 * 20);
if ($item_html) {
$item_html = defaultLinkTo($item_html, self::URI);
$item['content'] .= $item_html->find('main img', 0);
$item['content'] .= $item_html->find('main .mt-4', 0); // Summary
foreach ($item_html->find('main ul.mb-4 > li > a.js-download-link') as $file) {
if (!str_contains($file->href, 'fast_download')) {
$item['enclosures'][] = $file->href;
}
}
// Remove bulk torrents from enclosures list
$item['enclosures'] = array_diff($item['enclosures'], [self::URI . 'datasets']);
}
$this->items[] = $item;
}
}
}
public function getName()
{
$name = parent::getName();
if ($this->getInput('q') != null) {
$name .= ' - ' . $this->getInput('q');
}
return $name;
}
public function getURI()
{
$params = array_filter([ // Filter to remove non-provided parameters
'q' => $this->getInput('q'),
'ext' => $this->getInput('ext'),
'lang' => $this->getInput('lang'),
'src' => $this->getInput('src'),
'content' => $this->getInput('content'),
]);
$url = parent::getURI() . 'search?sort=newest&' . http_build_query($params);
return $url;
}
}

View File

@ -18,45 +18,9 @@ class AppleMusicBridge extends BridgeAbstract
'required' => true, 'required' => true,
], ],
]]; ]];
const CACHE_TIMEOUT = 60 * 60 * 6; // 6 hours const CACHE_TIMEOUT = 21600; // 6 hours
private $title;
public function collectData() public function collectData()
{
$items = $this->getJson();
$artist = $this->getArtist($items);
$this->title = $artist->artistName;
foreach ($items as $item) {
if ($item->wrapperType === 'collection') {
$copyright = $item->copyright ?? '';
$artworkUrl500 = str_replace('/100x100', '/500x500', $item->artworkUrl100);
$artworkUrl2000 = str_replace('/100x100', '/2000x2000', $item->artworkUrl100);
$escapedCollectionName = htmlspecialchars($item->collectionName);
$this->items[] = [
'title' => $item->collectionName,
'uri' => $item->collectionViewUrl,
'timestamp' => $item->releaseDate,
'enclosures' => $artworkUrl500,
'author' => $item->artistName,
'content' => "<figure>
<img srcset=\"$item->artworkUrl60 60w, $item->artworkUrl100 100w, $artworkUrl500 500w, $artworkUrl2000 2000w\"
sizes=\"100%\" src=\"$artworkUrl2000\"
alt=\"Cover of $escapedCollectionName\"
style=\"display: block; margin: 0 auto;\" />
<figcaption>
from <a href=\"$artist->artistLinkUrl\">$item->artistName</a><br />$copyright
</figcaption>
</figure>",
];
}
}
}
private function getJson()
{ {
# Limit the amount of releases to 50 # Limit the amount of releases to 50
if ($this->getInput('limit') > 50) { if ($this->getInput('limit') > 50) {
@ -65,53 +29,29 @@ class AppleMusicBridge extends BridgeAbstract
$limit = $this->getInput('limit'); $limit = $this->getInput('limit');
} }
$url = 'https://itunes.apple.com/lookup?id=' . $this->getInput('artist') . '&entity=album&limit=' . $limit . '&sort=recent'; $url = 'https://itunes.apple.com/lookup?id='
. $this->getInput('artist')
. '&entity=album&limit='
. $limit .
'&sort=recent';
$html = getSimpleHTMLDOM($url); $html = getSimpleHTMLDOM($url);
$json = json_decode($html); $json = json_decode($html);
$result = $json->results;
if (!is_array($result) || count($result) == 0) { foreach ($json->results as $obj) {
returnServerError('There is no artist with id "' . $this->getInput('artist') . '".'); if ($obj->wrapperType === 'collection') {
$this->items[] = [
'title' => $obj->artistName . ' - ' . $obj->collectionName,
'uri' => $obj->collectionViewUrl,
'timestamp' => $obj->releaseDate,
'enclosures' => $obj->artworkUrl100,
'content' => '<a href=' . $obj->collectionViewUrl
. '><img src="' . $obj->artworkUrl100 . '" /></a><br><br>'
. $obj->artistName . ' - ' . $obj->collectionName
. '<br>'
. $obj->copyright,
];
} }
return $result;
} }
private function getArtist($json)
{
$nameArray = array_filter($json, function ($obj) {
return $obj->wrapperType == 'artist';
});
if (count($nameArray) === 1) {
return $nameArray[0];
}
return parent::getName();
}
public function getName()
{
if (isset($this->title)) {
return $this->title;
}
return parent::getName();
}
public function getIcon()
{
if (empty($this->getInput('artist'))) {
return parent::getIcon();
}
// it isn't necessary to set the correct artist name into the url
$url = 'https://music.apple.com/us/artist/jon-bellion/' . $this->getInput('artist');
$html = getSimpleHTMLDOMCached($url);
$image = $html->find('meta[property="og:image"]', 0)->content;
$imageUpdatedSize = preg_replace('/\/\d*x\d*cw/i', '/144x144-999', $image);
return $imageUpdatedSize;
} }
} }

View File

@ -1,118 +0,0 @@
<?php
class ArsTechnicaBridge extends FeedExpander
{
const MAINTAINER = 'phantop';
const NAME = 'Ars Technica';
const URI = 'https://arstechnica.com/';
const DESCRIPTION = 'Returns the latest articles from Ars Technica';
const PARAMETERS = [[
'section' => [
'name' => 'Site section',
'type' => 'list',
'defaultValue' => 'index',
'values' => [
'All' => 'index',
'Apple' => 'apple',
'Board Games' => 'cardboard',
'Cars' => 'cars',
'Features' => 'features',
'Gaming' => 'gaming',
'Information Technology' => 'technology-lab',
'Science' => 'science',
'Staff Blogs' => 'staff-blogs',
'Tech Policy' => 'tech-policy',
'Tech' => 'gadgets',
]
]
]];
public function collectData()
{
$url = 'https://feeds.arstechnica.com/arstechnica/' . $this->getInput('section');
$this->collectExpandableDatas($url, 10);
}
protected function parseItem(array $item)
{
$item_html = getSimpleHTMLDOMCached($item['uri']);
$item_html = defaultLinkTo($item_html, self::URI);
$content = '';
$header = $item_html->find('article header', 0);
$leading = $header->find('p[class*=leading]', 0);
if ($leading != null) {
$content .= '<p>' . $leading->innertext . '</p>';
}
$intro_image = $header->find('img.intro-image', 0);
if ($intro_image != null) {
$content .= '<figure>' . $intro_image;
$image_caption = $header->find('.caption .caption-content', 0);
if ($image_caption != null) {
$content .= '<figcaption>' . $image_caption->innertext . '</figcaption>';
}
$content .= '</figure>';
}
foreach ($item_html->find('.post-content') as $content_tag) {
$content .= $content_tag->innertext;
}
$item['content'] = str_get_html($content);
$parsely = $item_html->find('[name="parsely-page"]', 0);
$parsely_json = json_decode(html_entity_decode($parsely->content), true);
$item['categories'] = $parsely_json['tags'];
// Some lightboxes are nested in figures. I'd guess that's a
// bug in the website
foreach ($item['content']->find('figure div div.ars-lightbox') as $weird_lightbox) {
$weird_lightbox->parent->parent->outertext = $weird_lightbox;
}
// It's easier to reconstruct the whole thing than remove
// duplicate reactive tags
foreach ($item['content']->find('.ars-lightbox') as $lightbox) {
$lightbox_content = '';
foreach ($lightbox->find('.ars-lightbox-item') as $lightbox_item) {
$img = $lightbox_item->find('img', 0);
if ($img != null) {
$lightbox_content .= '<figure>' . $img;
$caption = $lightbox_item->find('div.pswp-caption-content', 0);
if ($caption != null) {
$credit = $lightbox_item->find('div.ars-gallery-caption-credit', 0);
if ($credit != null) {
$credit->innertext = 'Credit: ' . $credit->innertext;
}
$lightbox_content .= '<figcaption>' . $caption->innertext . '</figcaption>';
}
$lightbox_content .= '</figure>';
}
}
$lightbox->innertext = $lightbox_content;
}
// remove various ars advertising
foreach ($item['content']->find('.ars-interlude-container') as $ad) {
$ad->remove();
}
foreach ($item['content']->find('.toc-container') as $toc) {
$toc->remove();
}
// Mostly YouTube videos
$iframes = $item['content']->find('iframe');
foreach ($iframes as $iframe) {
$iframe->outertext = '<a href="' . $iframe->src . '">' . $iframe->src . '</a>';
}
// This fixed padding around the former iframes and actual inline videos
foreach ($item['content']->find('div[style*=aspect-ratio]') as $styled) {
$styled->removeAttribute('style');
}
$item['content'] = backgroundToImg($item['content']);
$item['uid'] = strval($parsely_json['post_id']);
return $item;
}
}

View File

@ -156,10 +156,6 @@ class Arte7Bridge extends BridgeAbstract
. $element['mainImage']['url'] . $element['mainImage']['url']
. '" /></a>'; . '" /></a>';
$item['itunes'] = [
'duration' => $durationSeconds,
];
$this->items[] = $item; $this->items[] = $item;
} }
} }

View File

@ -45,6 +45,7 @@ class AsahiShimbunAJWBridge extends BridgeAbstract
foreach ($html->find('#MainInner li a') as $element) { foreach ($html->find('#MainInner li a') as $element) {
if ($element->parent()->class == 'HeadlineTopImage-S') { if ($element->parent()->class == 'HeadlineTopImage-S') {
Debug::log('Skip Headline, it is repeated below');
continue; continue;
} }
$item = []; $item = [];

View File

@ -37,8 +37,7 @@ class AskfmBridge extends BridgeAbstract
$item['timestamp'] = strtotime($element->find('time', 0)->datetime); $item['timestamp'] = strtotime($element->find('time', 0)->datetime);
$var = $element->find('div.streamItem_content', 0); $answer = trim($element->find('div.streamItem_content', 0)->innertext);
$answer = trim($var->innertext ?? '');
// This probably should be cleaned up, especially for YouTube embeds // This probably should be cleaned up, especially for YouTube embeds
if ($visual = $element->find('div.streamItem_visual', 0)) { if ($visual = $element->find('div.streamItem_visual', 0)) {

View File

@ -105,7 +105,8 @@ class AssociatedPressNewsBridge extends BridgeAbstract
private function collectCardData() private function collectCardData()
{ {
$json = getContents($this->getTagURI()); $json = getContents($this->getTagURI())
or returnServerError('Could not request: ' . $this->getTagURI());
$tagContents = json_decode($json, true); $tagContents = json_decode($json, true);

View File

@ -30,9 +30,6 @@ class AtmoNouvelleAquitaineBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
// this bridge is broken and unmaintained
return;
$uri = self::URI . '/monair/commune/' . $this->getInput('cities'); $uri = self::URI . '/monair/commune/' . $this->getInput('cities');
$html = getSimpleHTMLDOM($uri); $html = getSimpleHTMLDOM($uri);

View File

@ -1,344 +0,0 @@
<?php
class AuctionetBridge extends BridgeAbstract
{
const NAME = 'Auctionet';
const URI = 'https://www.auctionet.com';
const DESCRIPTION = 'Fetches info about auction objects from Auctionet (an auction platform for many European auction houses)';
const MAINTAINER = 'Qluxzz';
const PARAMETERS = [[
'category' => [
'name' => 'Category',
'type' => 'list',
'values' => [
'All categories' => '',
'Art' => [
'All' => '25-art',
'Drawings' => '119-drawings',
'Engravings & Prints' => '27-engravings-prints',
'Other' => '30-other',
'Paintings' => '28-paintings',
'Photography' => '26-photography',
'Sculptures & Bronzes' => '29-sculptures-bronzes',
],
'Asiatica' => [
'All' => '117-asiatica',
],
'Books, Maps & Manuscripts' => [
'All' => '50-books-maps-manuscripts',
'Autographs & Manuscripts' => '206-autographs-manuscripts',
'Books' => '204-books',
'Maps' => '205-maps',
'Other' => '207-other',
],
'Carpets & Textiles' => [
'All' => '35-carpets-textiles',
'Carpets' => '36-carpets',
'Textiles' => '37-textiles',
],
'Ceramics & Porcelain' => [
'All' => '9-ceramics-porcelain',
'European' => '10-european',
'Oriental' => '11-oriental',
'Rest of the world' => '12-rest-of-the-world',
'Tableware' => '210-tableware',
],
'Clocks & Watches' => [
'All' => '31-clocks-watches',
'Carriage & Miniature Clocks' => '258-carriage-miniature-clocks',
'Longcase clocks' => '32-longcase-clocks',
'Mantel clocks' => '33-mantel-clocks',
'Other clocks' => '34-other-clocks',
'Pocket & Stop Watches' => '110-pocket-stop-watches',
'Wall Clocks' => '127-wall-clocks',
'Wristwatches' => '15-wristwatches',
],
'Coins, Medals & Stamps' => [
'All' => '46-coins-medals-stamps',
'Coins' => '128-coins',
'Orders & Medals' => '135-orders-medals',
'Other' => '131-other',
'Stamps' => '136-stamps',
],
'Folk art' => [
'All' => '58-folk-art',
'Bowls & Boxes' => '121-bowls-boxes',
'Furniture' => '122-furniture',
'Other' => '123-other',
'Tools & Gears' => '120-tools-gears',
],
'Furniture' => [
'All' => '16-furniture',
'Armchairs & Chairs' => '18-armchairs-chairs',
'Chests of drawers' => '24-chests-of-drawers',
'Cupboards, Cabinets & Shelves' => '23-cupboards-cabinets-shelves',
'Dining room furniture' => '22-dining-room-furniture',
'Garden' => '21-garden',
'Other' => '17-other',
'Sofas & seatings' => '20-sofas-seatings',
'Tables' => '19-tables',
],
'Glass' => [
'All' => '6-glass',
'Art glass' => '208-art-glass',
'Other' => '8-other',
'Tableware' => '7-tableware',
'Utility glass' => '209-utility-glass',
],
'Jewellery & Gemstones' => [
'All' => '13-jewellery-gemstones',
'Alliance rings' => '113-alliance-rings',
'Bracelets' => '106-bracelets',
'Brooches & Pendants' => '107-brooches-pendants',
'Costume Jewellery' => '259-costume-jewellery',
'Cufflinks & Tie Pins' => '111-cufflinks-tie-pins',
'Ear studs' => '116-ear-studs',
'Earrings' => '115-earrings',
'Gemstones' => '48-gemstones',
'Jewellery' => '14-jewellery',
'Jewellery Suites' => '109-jewellery-suites',
'Necklace' => '104-necklace',
'Other' => '118-other',
'Rings' => '112-rings',
'Signet rings' => '105-signet-rings',
'Solitaire rings' => '114-solitaire-rings',
],
'Licence weapons' => [
'All' => '59-licence-weapons',
'Combi/Combo' => '63-combi-combo',
'Double express rifles' => '60-double-express-rifles',
'Rifles' => '61-rifles',
'Shotguns' => '62-shotguns',
],
'Lighting & Lamps' => [
'All' => '1-lighting-lamps',
'Candlesticks' => '4-candlesticks',
'Ceiling lights' => '3-ceiling-lights',
'Chandeliers' => '203-chandeliers',
'Floor lights' => '2-floor-lights',
'Other lighting' => '5-other-lighting',
'Table Lamps' => '125-table-lamps',
'Wall Lights' => '124-wall-lights',
],
'Mirrors' => [
'All' => '42-mirrors',
],
'Miscellaneous' => [
'All' => '43-miscellaneous',
'Fishing equipment' => '54-fishing-equipment',
'Miscellaneous' => '47-miscellaneous',
'Modern Tools' => '133-modern-tools',
'Modern consumer electronics' => '52-modern-consumer-electronics',
'Musical instruments' => '51-musical-instruments',
'Technica & Nautica' => '45-technica-nautica',
],
'Photo, Cameras & Lenses' => [
'All' => '57-photo-cameras-lenses',
'Cameras & accessories' => '71-cameras-accessories',
'Optics' => '66-optics',
'Other' => '72-other',
],
'Silver & Metals' => [
'All' => '38-silver-metals',
'Other metals' => '40-other-metals',
'Pewter, Brass & Copper' => '41-pewter-brass-copper',
'Silver' => '39-silver',
'Silver plated' => '213-silver-plated',
],
'Toys' => [
'All' => '44-toys',
'Comics' => '211-comics',
'Toys' => '212-toys',
],
'Tribal art' => [
'All' => '134-tribal-art',
],
'Vehicles, Boats & Parts' => [
'All' => '249-vehicles-boats-parts',
'Automobilia & Transport' => '255-automobilia-transport',
'Bicycles' => '132-bicycles',
'Boats & Accessories' => '250-boats-accessories',
'Car parts' => '253-car-parts',
'Cars' => '215-cars',
'Moped parts' => '254-moped-parts',
'Mopeds' => '216-mopeds',
'Motorcycle parts' => '252-motorcycle-parts',
'Motorcycles' => '251-motorcycles',
'Other' => '256-other',
],
'Vintage & Designer Fashion' => [
'All' => '49-vintage-designer-fashion',
],
'Weapons & Militaria' => [
'All' => '137-weapons-militaria',
'Airguns' => '257-airguns',
'Armour & Uniform' => '138-armour-uniform',
'Edged weapons' => '130-edged-weapons',
'Guns & Rifles' => '129-guns-rifles',
'Other' => '214-other',
],
'Wine, Port & Spirits' => [
'All' => '170-wine-port-spirits',
],
]
],
'sort_order' => [
'name' => 'Sort order',
'type' => 'list',
'values' => [
'Most bids' => 'bids_count_desc',
'Lowest bid' => 'bid_asc',
'Highest bid' => 'bid_desc',
'Last bid on' => 'bid_on',
'Ending soonest' => 'end_asc_active',
'Lowest estimate' => 'estimate_asc',
'Highest estimate' => 'estimate_desc',
'Recently added' => 'recent'
],
],
'country' => [
'name' => 'Country',
'type' => 'list',
'values' => [
'All' => '',
'Denmark' => 'DK',
'Finland' => 'FI',
'Germany' => 'DE',
'Spain' => 'ES',
'Sweden' => 'SE',
'United Kingdom' => 'GB'
]
],
'language' => [
'name' => 'Language',
'type' => 'list',
'values' => [
'English' => 'en',
'Español' => 'es',
'Deutsch' => 'de',
'Svenska' => 'sv',
'Dansk' => 'da',
'Suomi' => 'fi',
],
],
]];
const CACHE_TIMEOUT = 3600; // 1 hour
private $title;
public function collectData()
{
// Each page contains 48 auctions
// So we fetch 10 pages so we decrease the likelihood
// of missing auctions between feed refreshes
// Fetch first page and use that to get title
{
$url = $this->getUrl(1);
$data = getContents($url);
$title = $this->getDocumentTitle($data);
$this->items = array_merge($this->items, $this->parsePageData($data));
}
// Fetch remaining pages
for ($page = 2; $page <= 10; $page++) {
$url = $this->getUrl($page);
$data = getContents($url);
$this->items = array_merge($this->items, $this->parsePageData($data));
}
}
public function getName()
{
return $this->title ?: parent::getName();
}
/* HELPERS */
private function getUrl($page)
{
$category = $this->getInput('category');
$language = $this->getInput('language');
$sort_order = $this->getInput('sort_order');
$country = $this->getInput('country');
$url = self::URI . '/' . $language . '/search';
if ($category) {
$url = $url . '/' . $category;
}
$query = [];
$query['page'] = $page;
if ($sort_order) {
$query['order'] = $sort_order;
}
if ($country) {
$query['country_code'] = $country;
}
if (count($query) > 0) {
$url = $url . '?' . http_build_query($query);
}
return $url;
}
private function getDocumentTitle($data)
{
$title_elem = '<title>';
$title_elem_length = strlen($title_elem);
$title_start = strpos($data, $title_elem);
$title_end = strpos($data, '</title>', $title_start);
$title_length = $title_end - $title_start + strlen($title_elem);
$title = substr($data, $title_start + strlen($title_elem), $title_length);
return $title;
}
/**
* The auction items data is included in the HTML document
* as a HTML entities encoded JSON structure
* which is used to hydrate the React component for the list of auctions
*/
private function parsePageData($data)
{
$key = 'data-react-props="';
$keyLength = strlen($key);
$start = strpos($data, $key);
$end = strpos($data, '"', $start + strlen($key));
$length = $end - ($start + $keyLength);
$jsonString = substr($data, $start + $keyLength, $length);
$jsonData = json_decode(htmlspecialchars_decode($jsonString), false);
$items = [];
foreach ($jsonData->{'items'} as $item) {
$title = $item->{'longTitle'};
$relative_url = $item->{'url'};
$images = $item->{'imageUrls'};
$id = $item->{'auctionId'};
$items[] = [
'title' => $title,
'uri' => self::URI . $relative_url,
'uid' => $id,
'content' => count($images) > 0 ? "<img src='$images[0]'/><br/>$title" : $title,
'enclosures' => array_slice($images, 1),
];
}
return $items;
}
}

View File

@ -13,20 +13,12 @@ class AutoJMBridge extends BridgeAbstract
'type' => 'text', 'type' => 'text',
'required' => true, 'required' => true,
'title' => 'URL d\'une recherche avec filtre de véhicules sans le http://www.autojm.fr/', 'title' => 'URL d\'une recherche avec filtre de véhicules sans le http://www.autojm.fr/',
'exampleValue' => 'recherche?brands[]=PEUGEOT&ranges[]=PEUGEOT 308' 'exampleValue' => 'recherche?brands[]=peugeot&ranges[]=peugeot-nouvelle-308-2021-5p'
], ],
] ]
]; ];
const CACHE_TIMEOUT = 3600; const CACHE_TIMEOUT = 3600;
const TEST_DETECT_PARAMETERS = [
'https://www.autojm.fr/recherche?brands%5B%5D=PEUGEOT&ranges%5B%5D=PEUGEOT%20308'
=> ['url' => 'recherche?brands%5B%5D=PEUGEOT&ranges%5B%5D=PEUGEOT%20308',
'context' => 'Afficher les offres de véhicules disponible sur la recheche AutoJM'
]
];
public function getIcon() public function getIcon()
{ {
return self::URI . 'favicon.ico'; return self::URI . 'favicon.ico';
@ -43,17 +35,6 @@ class AutoJMBridge extends BridgeAbstract
} }
} }
public function getURI()
{
switch ($this->queriedContext) {
case 'Afficher les offres de véhicules disponible sur la recheche AutoJM':
return self::URI . $this->getInput('url');
break;
default:
return self::URI;
}
}
public function collectData() public function collectData()
{ {
// Get the number of result for this search // Get the number of result for this search
@ -71,7 +52,7 @@ class AutoJMBridge extends BridgeAbstract
$data = json_decode($json); $data = json_decode($json);
$nb_results = $data->nbResults; $nb_results = $data->nbResults;
$total_pages = ceil($nb_results / 14); $total_pages = ceil($nb_results / 15);
// Limit the number of page to analyse to 10 // Limit the number of page to analyse to 10
for ($page = 1; $page <= $total_pages && $page <= 10; $page++) { for ($page = 1; $page <= $total_pages && $page <= 10; $page++) {
@ -85,8 +66,8 @@ class AutoJMBridge extends BridgeAbstract
$image = $car->find('div[class=card-car__header__img]', 0)->find('img', 0)->src; $image = $car->find('div[class=card-car__header__img]', 0)->find('img', 0)->src;
// Decode HTML attribute JSON data // Decode HTML attribute JSON data
$car_data = json_decode(html_entity_decode($car->{'data-layer'})); $car_data = json_decode(html_entity_decode($car->{'data-layer'}));
$car_model = $car_data->title; $car_model = $car->{'data-title'} . ' ' . $car->{'data-suptitle'};
$availability = $car->find('div[class*=card-car__modalites]', 0)->find('div[class=col]', 0)->plaintext; $availability = $car->find('div[class=card-car__modalites]', 0)->find('div[class=col]', 0)->plaintext;
$warranty = $car->find('div[data-type=WarrantyCard]', 0)->plaintext; $warranty = $car->find('div[data-type=WarrantyCard]', 0)->plaintext;
$discount_html = $car->find('div[class=subtext vehicle_reference_element]', 0); $discount_html = $car->find('div[class=subtext vehicle_reference_element]', 0);
// Check if there is any discount info displayed // Check if there is any discount info displayed
@ -151,18 +132,4 @@ class AutoJMBridge extends BridgeAbstract
return $html; return $html;
} }
public function detectParameters($url)
{
$params = [];
$regex = '/^(https?:\/\/)?(www\.|)autojm.fr\/(recherche\?.*|recherche\/[0-9]{1,10}\?.*)$/m';
if (preg_match($regex, $url, $matches) > 0) {
$url = preg_replace('#(recherche|recherche/[0-9]{1,10})#', 'recherche', $matches[3]);
$params['url'] = $url;
$params['context'] = 'Afficher les offres de véhicules disponible sur la recheche AutoJM';
return $params;
}
}
} }

View File

@ -14,10 +14,29 @@ class AwwwardsBridge extends BridgeAbstract
private $sites = []; private $sites = [];
public function getIcon()
{
return 'https://www.awwwards.com/favicon.ico';
}
private function fetchSites()
{
Debug::log('Fetching all sites');
$sites = getSimpleHTMLDOM(self::SITESURI);
Debug::log('Parsing all JSON data');
foreach ($sites->find('.grid-sites li') as $site) {
$decode = html_entity_decode($site->attr['data-collectable-model-value'], ENT_QUOTES, 'utf-8');
$decode = json_decode($decode, true);
$this->sites[] = $decode;
}
}
public function collectData() public function collectData()
{ {
$this->fetchSites(); $this->fetchSites();
Debug::log('Building RSS feed');
foreach ($this->sites as $site) { foreach ($this->sites as $site) {
$item = []; $item = [];
$item['title'] = $site['title']; $item['title'] = $site['title'];
@ -37,23 +56,4 @@ class AwwwardsBridge extends BridgeAbstract
} }
} }
} }
public function getIcon()
{
return 'https://www.awwwards.com/favicon.ico';
}
private function fetchSites()
{
$sites = getSimpleHTMLDOM(self::SITESURI);
foreach ($sites->find('.grid-sites li') as $li) {
$encodedJson = $li->attr['data-collectable-model-value'] ?? null;
if (!$encodedJson) {
continue;
}
$json = html_entity_decode($encodedJson, ENT_QUOTES, 'utf-8');
$site = Json::decode($json);
$this->sites[] = $site;
}
}
} }

View File

@ -29,7 +29,7 @@ class BAEBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
$url = $this->getURI(); $url = $this->getURI();
$html = getSimpleHTMLDOM($url); $html = getSimpleHTMLDOM($url) or returnClientError('No results for this query.');
$annonces = $html->find('main article'); $annonces = $html->find('main article');
foreach ($annonces as $annonce) { foreach ($annonces as $annonce) {

View File

@ -1,254 +0,0 @@
<?php
class BMDSystemhausBlogBridge extends BridgeAbstract
{
const MAINTAINER = 'cn-tools';
const NAME = 'BMD SYSTEMHAUS GesmbH';
const CACHE_TIMEOUT = 21600; //6h
const URI = 'https://www.bmd.com';
const DONATION_URI = 'https://paypal.me/cntools';
const DESCRIPTION = 'BMD Systemhaus - We make business easy';
const BMD_FAV_ICON = 'https://www.bmd.com/favicon.ico';
const ITEMSTYLE = [
'ilcr' => '<table width="100%"><tr><td style="vertical-align: top;">{data_img}</td><td style="vertical-align: top;">{data_content}</td></tr></table>',
'clir' => '<table width="100%"><tr><td style="vertical-align: top;">{data_content}</td><td style="vertical-align: top;">{data_img}</td></tr></table>',
'itcb' => '<div>{data_img}<br />{data_content}</div>',
'ctib' => '<div>{data_content}<br />{data_img}</div>',
'co' => '{data_content}',
'io' => '{data_img}'
];
const PARAMETERS = [
'Blog' => [
'country' => [
'name' => 'Country',
'type' => 'list',
'values' => [
'Österreich' => 'at',
'Deutschland' => 'de',
'Schweiz' => 'ch',
'Slovensko' => 'sk',
'Cesko' => 'cz',
'Hungary' => 'hu',
],
'defaultValue' => 'at',
],
'style' => [
'name' => 'Style',
'type' => 'list',
'values' => [
'Image left, content right' => 'ilcr',
'Content left, image right' => 'clir',
'Image top, content bottom' => 'itcb',
'Content top, image bottom' => 'ctib',
'Content only' => 'co',
'Image only' => 'io',
],
'defaultValue' => 'ilcr',
]
]
];
//-----------------------------------------------------
public function collectData()
{
// get website content
$html = getSimpleHTMLDOM($this->getURI());
// Convert relative links in HTML into absolute links
$html = defaultLinkTo($html, self::URI);
// Convert lazy-loading images and frames (video embeds) into static elements
$html = convertLazyLoading($html);
foreach ($html->find('div#bmdNewsList div#bmdNewsList-Item') as $element) {
$itemScope = $element->find('div[itemscope=itemscope]', 0);
$item = [];
// set base article data
$item['title'] = $this->getMetaItemPropContent($itemScope, 'headline');
$item['timestamp'] = strtotime($this->getMetaItemPropContent($itemScope, 'datePublished'));
$item['author'] = $this->getMetaItemPropContent($itemScope->find('div[itemprop=author]', 0), 'name');
// find article image
$imageTag = '';
$image = $element->find('div.mediaelement.mediaelement-image img', 0);
if ((!is_null($image)) and ($image->src != '')) {
$item['enclosures'] = [$image->src];
$imageTag = '<img src="' . $image->src . '"/>';
}
// begin with right style
$content = self::ITEMSTYLE[$this->getInput('style')];
// render placeholder
$content = str_replace('{data_content}', $this->getMetaItemPropContent($itemScope, 'description'), $content);
$content = str_replace('{data_img}', $imageTag, $content);
// set finished content
$item['content'] = $content;
// get link to article
$link = $element->find('div#bmdNewsList-Text div#bmdNewsList-Title a', 0);
if (!is_null($link)) {
$item['uri'] = $link->href;
}
// init categories
$categories = [];
$tmpOne = [];
$tmpTwo = [];
// search first categorie span
$catElem = $element->find('div#bmdNewsList-Text div#bmdNewsList-Category span.news-list-category', 0);
$txt = trim($catElem->innertext);
$tmpOne = explode('/', $txt);
// split by 2 spaces
foreach ($tmpOne as $tmpElem) {
$tmpElem = trim($tmpElem);
$tmpData = preg_split('/ /', $tmpElem);
$tmpTwo = array_merge($tmpTwo, $tmpData);
}
// split by tabulator
foreach ($tmpTwo as $tmpElem) {
$tmpElem = trim($tmpElem);
$tmpData = preg_split('/\t+/', $tmpElem);
$categories = array_merge($categories, $tmpData);
}
// trim each categorie entries
$categories = array_map('trim', $categories);
// remove empty entries
$categories = array_filter($categories, function ($value) {
return !is_null($value) && $value !== '';
});
// set categories
if (count($categories) > 0) {
$item['categories'] = $categories;
}
// add item
if (($item['title'] != '') and ($item['content'] != '') and ($item['uri'] != '')) {
$this->items[] = $item;
}
}
}
//-----------------------------------------------------
public function detectParameters($url)
{
try {
$parsedUrl = Url::fromString($url);
} catch (UrlException $e) {
return null;
}
if (!in_array($parsedUrl->getHost(), ['www.bmd.com', 'bmd.com'])) {
return null;
}
$lang = '';
// extract language from url
$path = explode('/', $parsedUrl->getPath());
if (count($path) > 1) {
$lang = $path[1];
// validate data
if ($this->getURIbyCountry($lang) == '') {
$lang = '';
}
}
// if no country available, find language by browser
if ($lang == '') {
$srvLanguages = explode(';', $_SERVER['HTTP_ACCEPT_LANGUAGE']);
if (count($srvLanguages) > 0) {
$languages = explode(',', $srvLanguages[0]);
if (count($languages) > 0) {
for ($i = 0; $i < count($languages); $i++) {
$langDetails = explode('-', $languages[$i]);
if (count($langDetails) > 1) {
$lang = $langDetails[1];
} else {
$lang = substr($srvLanguages[0], 0, 2);
}
// validate data
if ($this->getURIbyCountry($lang) == '') {
$lang = '';
}
if ($lang != '') {
break;
}
}
}
}
}
// if no URL found by language, use AT as default
if ($this->getURIbyCountry($lang) == '') {
$lang = 'at';
}
$params = [];
$params['country'] = strtolower($lang);
return $params;
}
//-----------------------------------------------------
public function getURI()
{
$country = $this->getInput('country') ?? '';
$lURI = $this->getURIbyCountry($country);
return $lURI != '' ? $lURI : parent::getURI();
}
//-----------------------------------------------------
public function getIcon()
{
return self::BMD_FAV_ICON;
}
//-----------------------------------------------------
private function getMetaItemPropContent($elem, $key)
{
if (($key != '') and (!is_null($elem))) {
$metaElem = $elem->find('meta[itemprop=' . $key . ']', 0);
if (!is_null($metaElem)) {
return $metaElem->getAttribute('content');
}
}
return '';
}
//-----------------------------------------------------
private function getURIbyCountry($country)
{
switch (strtolower($country)) {
case 'at':
return 'https://www.bmd.com/at/ueber-bmd/blog-ohne-filter.html';
case 'de':
return 'https://www.bmd.com/de/das-ist-bmd/blog.html';
case 'ch':
return 'https://www.bmd.com/ch/das-ist-bmd/blog.html';
case 'sk':
return 'https://www.bmd.com/sk/firma/blog.html';
case 'cz':
return 'https://www.bmd.com/cz/firma/news-blog.html';
case 'hu':
return 'https://www.bmd.com/hu/rolunk/hirek.html';
default:
return '';
}
}
}

View File

@ -138,7 +138,6 @@ class BadDragonBridge extends BridgeAbstract
// Sale // Sale
$regex = '/^(https?:\/\/)?bad-dragon\.com\/sales/'; $regex = '/^(https?:\/\/)?bad-dragon\.com\/sales/';
if (preg_match($regex, $url, $matches) > 0) { if (preg_match($regex, $url, $matches) > 0) {
$params['context'] = 'Sales';
return $params; return $params;
} }
@ -193,7 +192,6 @@ class BadDragonBridge extends BridgeAbstract
isset($urlParams['noAccessories']) isset($urlParams['noAccessories'])
&& $urlParams['noAccessories'] === '1' && $urlParams['noAccessories'] === '1'
&& $params['noAccessories'] = 'on'; && $params['noAccessories'] = 'on';
$params['context'] = 'Clearance';
return $params; return $params;
} }
@ -284,7 +282,8 @@ class BadDragonBridge extends BridgeAbstract
case 'Clearance': case 'Clearance':
$toyData = json_decode(getContents($this->inputToURL(true))); $toyData = json_decode(getContents($this->inputToURL(true)));
$productList = json_decode(getContents(self::URI . 'api/inventory-toy/product-list')); $productList = json_decode(getContents(self::URI
. 'api/inventory-toy/product-list'));
foreach ($toyData->toys as $toy) { foreach ($toyData->toys as $toy) {
$item = []; $item = [];

View File

@ -112,11 +112,11 @@ class BandcampBridge extends BridgeAbstract
$data = $this->buildRequestJson(); $data = $this->buildRequestJson();
$header = [ $header = [
'Content-Type: application/json', 'Content-Type: application/json',
'Content-Length: ' . strlen($data), 'Content-Length: ' . strlen($data)
]; ];
$opts = [ $opts = [
CURLOPT_CUSTOMREQUEST => 'POST', CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_POSTFIELDS => $data, CURLOPT_POSTFIELDS => $data
]; ];
$content = getContents($url, $header, $opts); $content = getContents($url, $header, $opts);
@ -314,8 +314,7 @@ class BandcampBridge extends BridgeAbstract
{ {
$url = self::URI . 'api/' . $endpoint . '?' . http_build_query($query_data); $url = self::URI . 'api/' . $endpoint . '?' . http_build_query($query_data);
// todo: 429 Too Many Requests happens a lot // todo: 429 Too Many Requests happens a lot
$response = getContents($url); $data = json_decode(getContents($url));
$data = json_decode($response);
return $data; return $data;
} }
@ -398,7 +397,6 @@ class BandcampBridge extends BridgeAbstract
// By tag // By tag
$regex = '/^(https?:\/\/)?bandcamp\.com\/tag\/([^\/.&?\n]+)/'; $regex = '/^(https?:\/\/)?bandcamp\.com\/tag\/([^\/.&?\n]+)/';
if (preg_match($regex, $url, $matches) > 0) { if (preg_match($regex, $url, $matches) > 0) {
$params['context'] = 'By tag';
$params['tag'] = urldecode($matches[2]); $params['tag'] = urldecode($matches[2]);
return $params; return $params;
} }
@ -406,7 +404,6 @@ class BandcampBridge extends BridgeAbstract
// By band // By band
$regex = '/^(https?:\/\/)?([^\/.&?\n]+?)\.bandcamp\.com/'; $regex = '/^(https?:\/\/)?([^\/.&?\n]+?)\.bandcamp\.com/';
if (preg_match($regex, $url, $matches) > 0) { if (preg_match($regex, $url, $matches) > 0) {
$params['context'] = 'By band';
$params['band'] = urldecode($matches[2]); $params['band'] = urldecode($matches[2]);
return $params; return $params;
} }
@ -414,7 +411,6 @@ class BandcampBridge extends BridgeAbstract
// By album // By album
$regex = '/^(https?:\/\/)?([^\/.&?\n]+?)\.bandcamp\.com\/album\/([^\/.&?\n]+)/'; $regex = '/^(https?:\/\/)?([^\/.&?\n]+?)\.bandcamp\.com\/album\/([^\/.&?\n]+)/';
if (preg_match($regex, $url, $matches) > 0) { if (preg_match($regex, $url, $matches) > 0) {
$params['context'] = 'By album';
$params['band'] = urldecode($matches[2]); $params['band'] = urldecode($matches[2]);
$params['album'] = urldecode($matches[3]); $params['album'] = urldecode($matches[3]);
return $params; return $params;

View File

@ -93,7 +93,8 @@ class BandcampDailyBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
$html = getSimpleHTMLDOM($this->getURI()); $html = getSimpleHTMLDOM($this->getURI())
or returnServerError('Could not request: ' . $this->getURI());
$html = defaultLinkTo($html, self::URI); $html = defaultLinkTo($html, self::URI);
@ -104,7 +105,8 @@ class BandcampDailyBridge extends BridgeAbstract
$articlePath = $article->find('a.title', 0)->href; $articlePath = $article->find('a.title', 0)->href;
$articlePageHtml = getSimpleHTMLDOMCached($articlePath, 3600); $articlePageHtml = getSimpleHTMLDOMCached($articlePath, 3600)
or returnServerError('Could not request: ' . $articlePath);
$item['uri'] = $articlePath; $item['uri'] = $articlePath;
$item['title'] = $articlePageHtml->find('article-title', 0)->innertext; $item['title'] = $articlePageHtml->find('article-title', 0)->innertext;

View File

@ -1,139 +0,0 @@
<?php
class BazarakiBridge extends BridgeAbstract
{
const NAME = 'Bazaraki Bridge';
const URI = 'https://bazaraki.com';
const DESCRIPTION = 'Fetch adverts from Bazaraki, a Cyprus-based classifieds website.';
const MAINTAINER = 'danwain';
const PARAMETERS = [
[
'url' => [
'name' => 'URL',
'type' => 'text',
'required' => true,
'title' => 'Enter the URL of the Bazaraki page to fetch adverts from.',
'exampleValue' => 'https://www.bazaraki.com/real-estate-for-sale/houses/?lat=0&lng=0&radius=100000',
],
'limit' => [
'name' => 'Limit',
'type' => 'number',
'required' => false,
'title' => 'Enter the number of adverts to fetch. (max 50)',
'exampleValue' => '10',
'defaultValue' => 10,
]
]
];
public function collectData()
{
$url = $this->getInput('url');
if (! str_starts_with($url, 'https://www.bazaraki.com/')) {
throw new \Exception('Nope');
}
$html = getSimpleHTMLDOM($url);
$i = 0;
foreach ($html->find('div.advert') as $element) {
$i++;
if ($i > $this->getInput('limit') || $i > 50) {
break;
}
$item = [];
$item['uri'] = 'https://www.bazaraki.com' . $element->find('a.advert__content-title', 0)->href;
# Get the content
$advert = getSimpleHTMLDOM($item['uri']);
$price = trim($advert->find('div.announcement-price__cost', 0)->plaintext);
$name = trim($element->find('a.advert__content-title', 0)->plaintext);
$item['title'] = $name . ' - ' . $price;
$time = trim($advert->find('span.date-meta', 0)->plaintext);
$time = str_replace('Posted: ', '', $time);
$item['content'] = $this->processAdvertContent($advert);
$item['timestamp'] = $this->convertRelativeTime($time);
$item['author'] = trim($advert->find('div.author-name', 0)->plaintext);
$item['uid'] = $advert->find('span.number-announcement', 0)->plaintext;
$this->items[] = $item;
}
}
/**
* Process the advert content to clean up HTML
*
* @param simple_html_dom $advert The SimpleHTMLDOM object for the advert page
* @return string Processed HTML content
*/
private function processAdvertContent($advert)
{
// Get the content sections
$header = $advert->find('div.announcement-content-header', 0);
$characteristics = $advert->find('div.announcement-characteristics', 0);
$description = $advert->find('div.js-description', 0);
$images = $advert->find('div.announcement__images', 0);
// Remove all favorites divs
foreach ($advert->find('div.announcement-meta__favorites') as $favorites) {
$favorites->outertext = '';
}
// Replace all <a> tags with their text content
foreach ($advert->find('a') as $a) {
$a->outertext = $a->innertext;
}
// Format the content with section headers and dividers
$formattedContent = '';
// Add header section
$formattedContent .= $header->innertext;
$formattedContent .= '<hr/>';
// Add characteristics section with header
$formattedContent .= '<h3>Details</h3>';
$formattedContent .= $characteristics->innertext;
$formattedContent .= '<hr/>';
// Add description section with header
$formattedContent .= '<h3>Description</h3>';
$formattedContent .= $description->innertext;
$formattedContent .= '<hr/>';
// Add images section with header
$formattedContent .= '<h3>Images</h3>';
$formattedContent .= $images->innertext;
return $formattedContent;
}
/**
* Convert relative time strings like "Yesterday 12:32" to proper timestamps
*
* @param string $timeString The relative time string from the website
* @return string Timestamp in a format compatible with strtotime()
*/
private function convertRelativeTime($timeString)
{
if (strpos($timeString, 'Yesterday') !== false) {
// Replace "Yesterday" with actual date
$time = str_replace('Yesterday', date('Y-m-d', strtotime('-1 day')), $timeString);
return date('Y-m-d H:i:s', strtotime($time));
} elseif (strpos($timeString, 'Today') !== false) {
// Replace "Today" with actual date
$time = str_replace('Today', date('Y-m-d'), $timeString);
return date('Y-m-d H:i:s', strtotime($time));
} else {
// For other formats, return as is and let strtotime handle it
return $timeString;
}
}
}

View File

@ -8,27 +8,48 @@ class BinanceBridge extends BridgeAbstract
const MAINTAINER = 'thefranke'; const MAINTAINER = 'thefranke';
const CACHE_TIMEOUT = 3600; // 1h const CACHE_TIMEOUT = 3600; // 1h
public function collectData()
{
$url = 'https://www.binance.com/bapi/composite/v1/public/content/blog/list?category=&tag=&page=1&size=12';
$json = getContents($url);
$data = Json::decode($json, false);
foreach ($data->data->blogList as $post) {
$item = [];
$item['title'] = $post->title;
// Url slug not in json
//$item['uri'] = $uri;
$item['timestamp'] = $post->postTimeUTC / 1000;
$item['author'] = 'Binance';
$item['content'] = $post->brief;
//$item['categories'] = $category;
$item['uid'] = $post->idStr;
$this->items[] = $item;
}
}
public function getIcon() public function getIcon()
{ {
return 'https://bin.bnbstatic.com/static/images/common/favicon.ico'; return 'https://bin.bnbstatic.com/static/images/common/favicon.ico';
} }
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI)
or returnServerError('Could not fetch Binance blog data.');
$appData = $html->find('script[id="__APP_DATA"]');
$appDataJson = json_decode($appData[0]->innertext);
$allposts = $appDataJson->routeProps->f3ac->blogListRes->list;
foreach ($allposts as $element) {
$date = $element->releasedTime;
$title = $element->title;
$category = $element->category->name;
$suburl = strtolower($category);
$suburl = str_replace(' ', '_', $suburl);
$uri = self::URI . '/' . $suburl . '/' . $element->idStr;
$contentHTML = getSimpleHTMLDOMCached($uri);
$contentAppData = $contentHTML->find('script[id="__APP_DATA"]');
$contentAppDataJson = json_decode($contentAppData[0]->innertext);
$content = $contentAppDataJson->routeProps->a106->blogDetail->content;
$item = [];
$item['title'] = $title;
$item['uri'] = $uri;
$item['timestamp'] = substr($date, 0, -3);
$item['author'] = 'Binance';
$item['content'] = $content;
$item['categories'] = $category;
$this->items[] = $item;
if (count($this->items) >= 10) {
break;
}
}
}
} }

View File

@ -7,14 +7,10 @@ class BleepingComputerBridge extends FeedExpander
const URI = 'https://www.bleepingcomputer.com/'; const URI = 'https://www.bleepingcomputer.com/';
const DESCRIPTION = 'Returns the newest articles.'; const DESCRIPTION = 'Returns the newest articles.';
public function collectData() protected function parseItem($item)
{ {
$feed = static::URI . 'feed/'; $item = parent::parseItem($item);
$this->collectExpandableDatas($feed);
}
protected function parseItem(array $item)
{
$article_html = getSimpleHTMLDOMCached($item['uri']); $article_html = getSimpleHTMLDOMCached($item['uri']);
if (!$article_html) { if (!$article_html) {
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>'; $item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
@ -27,4 +23,10 @@ class BleepingComputerBridge extends FeedExpander
return $item; return $item;
} }
public function collectData()
{
$feed = static::URI . 'feed/';
$this->collectExpandableDatas($feed);
}
} }

View File

@ -1,6 +1,6 @@
<?php <?php
class BlizzardNewsBridge extends BridgeAbstract class BlizzardNewsBridge extends XPathAbstract
{ {
const NAME = 'Blizzard News'; const NAME = 'Blizzard News';
const URI = 'https://news.blizzard.com'; const URI = 'https://news.blizzard.com';
@ -35,73 +35,26 @@ class BlizzardNewsBridge extends BridgeAbstract
]; ];
const CACHE_TIMEOUT = 3600; const CACHE_TIMEOUT = 3600;
private const PRODUCT_IDS = [ const XPATH_EXPRESSION_ITEM = '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article';
'blt525c436e4a1b0a97', const XPATH_EXPRESSION_ITEM_TITLE = './/div/div[2]/h2';
'blt54fbd3787a705054', const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@class="ArticleListItem-description"]/div[@class="h6"]';
'blt2031aef34200656d', const XPATH_EXPRESSION_ITEM_URI = './/a[@class="ArticleLink ArticleLink"]/@href';
'blt795c314400d7ded9', const XPATH_EXPRESSION_ITEM_AUTHOR = '';
'blt5cfc6affa3ca0638', const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp';
'blt2e50e1521bb84dc6', const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/div[@class="ArticleListItem-image"]/@style';
'blt376fb94931906b6f', const XPATH_EXPRESSION_ITEM_CATEGORIES = './/div[@class="ArticleListItem-label"]';
'blt81d46fcb05ab8811', const SETTING_FIX_ENCODING = true;
'bltede2389c0a8885aa',
'blt24859ba8086fb294',
'blte27d02816a8ff3e1',
'blt2caca37e42f19839',
'blt90855744d00cd378',
'bltec70ad0ea4fd6d1d',
'blt500c1f8b5470bfdb'
];
private const API_PATH = '/api/news/blizzard?';
/** /**
* Source Web page URL (should provide either HTML or XML content) * Source Web page URL (should provide either HTML or XML content)
* @return string * @return string
*/ */
private function getSourceUrl(): string protected function getSourceUrl()
{ {
$locale = $this->getInput('locale'); $locale = $this->getInput('locale');
if ('zh-cn' === $locale) { if ('zh-cn' === $locale) {
$baseUrl = 'https://cn.news.blizzard.com' . self::API_PATH; return 'https://cn.news.blizzard.com';
} else {
$baseUrl = 'https://news.blizzard.com/' . $locale . self::API_PATH;
} }
return $baseUrl .= http_build_query([ return 'https://news.blizzard.com/' . $locale;
'feedCxpProductIds' => self::PRODUCT_IDS
]);
}
public function collectData()
{
$feedContent = json_decode(getContents($this->getSourceUrl()), true);
foreach ($feedContent['feed']['contentItems'] as $entry) {
$properties = $entry['properties'];
$item = [];
$item['title'] = $this->filterChars($properties['title']);
$item['content'] = $this->filterChars($properties['summary']);
$item['uri'] = $properties['newsUrl'];
$item['author'] = $this->filterChars($properties['author']);
$item['timestamp'] = strtotime($properties['lastUpdated']);
$item['enclosures'] = [$properties['staticAsset']['imageUrl']];
$item['categories'] = [$this->filterChars($properties['cxpProduct']['title'])];
$this->items[] = $item;
}
}
private function filterChars($content)
{
return htmlspecialchars($content, ENT_XML1);
}
public function getIcon()
{
return <<<icon
https://dfbmfbnnydoln.cloudfront.net/production/images/favicons/favicon.ba01bb119359d74970b02902472fd82e96b5aba7.ico
icon;
} }
} }

View File

@ -1,670 +0,0 @@
<?php
class BlueskyBridge extends BridgeAbstract
{
//Initial PR by [RSSBridge contributors](https://github.com/RSS-Bridge/rss-bridge/issues/4058).
//Modified from [©DIYgod and contributors at RSSHub](https://github.com/DIYgod/RSSHub/tree/master/lib/routes/bsky), MIT License';
const NAME = 'Bluesky Bridge';
const URI = 'https://bsky.app';
const DESCRIPTION = 'Fetches posts from Bluesky';
const MAINTAINER = 'mruac';
const PARAMETERS = [
[
'data_source' => [
'name' => 'Bluesky Data Source',
'type' => 'list',
'defaultValue' => 'Profile',
'values' => [
'Profile' => 'getAuthorFeed',
],
'title' => 'Select the type of data source to fetch from Bluesky.'
],
'user_id' => [
'name' => 'User Handle or DID',
'type' => 'text',
'required' => true,
'exampleValue' => 'did:plc:z72i7hdynmk6r22z27h6tvur',
'title' => 'ATProto / Bsky.app handle or DID'
],
'feed_filter' => [
'name' => 'Feed type',
'type' => 'list',
'defaultValue' => 'posts_and_author_threads',
'values' => [
'Posts feed' => 'posts_and_author_threads',
'All posts and replies' => 'posts_with_replies',
'Root posts only' => 'posts_no_replies',
'Media only' => 'posts_with_media',
]
],
'include_reposts' => [
'name' => 'Include Reposts?',
'type' => 'checkbox',
'defaultValue' => 'checked'
],
'include_reply_context' => [
'name' => 'Include Reply context?',
'type' => 'checkbox'
],
'verbose_title' => [
'name' => 'Use verbose feed item titles?',
'type' => 'checkbox'
]
]
];
private $profile;
public function getName()
{
if (isset($this->profile)) {
if ($this->profile['handle'] === 'handle.invalid') {
return sprintf('Bluesky - %s', $this->profile['displayName']);
} else {
return sprintf('Bluesky - %s (@%s)', $this->profile['displayName'], $this->profile['handle']);
}
}
return parent::getName();
}
public function getURI()
{
if (isset($this->profile)) {
if ($this->profile['handle'] === 'handle.invalid') {
return self::URI . '/profile/' . $this->profile['did'];
} else {
return self::URI . '/profile/' . $this->profile['handle'];
}
}
return parent::getURI();
}
public function getIcon()
{
if (isset($this->profile)) {
return $this->profile['avatar'];
}
return parent::getIcon();
}
public function getDescription()
{
if (isset($this->profile)) {
return $this->profile['description'];
}
return parent::getDescription();
}
private function parseExternal($external, $did)
{
$description = '';
$externalUri = $external['uri'];
$externalTitle = e($external['title']);
$externalDescription = e($external['description']);
$thumb = $external['thumb'] ?? null;
if (preg_match('/http(|s):\/\/media\.tenor\.com/', $externalUri)) {
//tenor gif embed
$tenorInterstitial = str_replace('media.tenor.com', 'media1.tenor.com/m', $externalUri);
$description .= "<figure><a href=\"$tenorInterstitial\"><img src=\"$externalUri\"/></a><figcaption>$externalTitle</figcaption></figure>";
} else {
//link embed preview
$host = parse_url($externalUri)['host'];
$thumbDesc = $thumb ? ('<img src="https://cdn.bsky.app/img/feed_thumbnail/plain/' . $did . '/' . $thumb['ref']['$link'] . '@jpeg"/>') : '';
$externalDescription = strlen($externalDescription) > 0 ? "<figcaption>($host) $externalDescription</figcaption>" : '';
$description .= '<br><blockquote><b><a href="' . $externalUri . '">' . $externalTitle . '</a></b>';
$description .= '<figure>' . $thumbDesc . $externalDescription . '</figure></blockquote>';
}
return $description;
}
private function textToDescription($record)
{
if (isset($record['value'])) {
$record = $record['value'];
}
$text = $record['text'];
$text_copy = $text;
$text = nl2br(e($text));
if (isset($record['facets'])) {
$facets = $record['facets'];
foreach ($facets as $facet) {
if ($facet['features'][0]['$type'] === 'app.bsky.richtext.facet#link') {
$substring = substr($text_copy, $facet['index']['byteStart'], $facet['index']['byteEnd'] - $facet['index']['byteStart']);
$text = str_replace($substring, '<a href="' . $facet['features'][0]['uri'] . '">' . $substring . '</a>', $text);
}
}
}
return $text;
}
public function collectData()
{
$user_id = $this->getInput('user_id');
$handle_match = preg_match('/(?:[a-zA-Z]*\.)+([a-zA-Z](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)/', $user_id, $handle_res); //gets the TLD in $handle_match[1]
$did_match = preg_match('/did:plc:[a-z2-7]{24}/', $user_id); //https://github.com/did-method-plc/did-method-plc#identifier-syntax
$exclude = ['alt', 'arpa', 'example', 'internal', 'invalid', 'local', 'localhost', 'onion']; //https://en.wikipedia.org/wiki/Top-level_domain#Reserved_domains
if ($handle_match == true && array_search($handle_res[1], $exclude) == false) {
//valid bsky handle
$did = $this->resolveHandle($user_id);
} elseif ($did_match == true) {
//valid DID
$did = $user_id;
} else {
returnClientError('Invalid ATproto handle or DID provided.');
}
$filter = $this->getInput('feed_filter') ?: 'posts_and_author_threads';
$replyContext = $this->getInput('include_reply_context');
$this->profile = $this->getProfile($did);
$authorFeed = $this->getAuthorFeed($did, $filter);
foreach ($authorFeed['feed'] as $post) {
$postRecord = $post['post']['record'];
$item = [];
$item['uri'] = self::URI . '/profile/' . $this->fallbackAuthor($post['post']['author'], 'url') . '/post/' . explode('app.bsky.feed.post/', $post['post']['uri'])[1];
$item['title'] = $this->getInput('verbose_title') ? $this->generateVerboseTitle($post) : strtok($postRecord['text'], "\n");
$item['timestamp'] = strtotime($postRecord['createdAt']);
$item['author'] = $this->fallbackAuthor($post['post']['author'], 'display');
$postAuthorDID = $post['post']['author']['did'];
$postAuthorHandle = $post['post']['author']['handle'] !== 'handle.invalid' ? '<i>@' . $post['post']['author']['handle'] . '</i> ' : '';
$postDisplayName = $post['post']['author']['displayName'] ?? '';
$postDisplayName = e($postDisplayName);
$postUri = $item['uri'];
if (Debug::isEnabled()) {
$url = explode('/', $post['post']['uri']);
$this->logger->debug('https://bsky.app/profile/' . $url[2] . '/post/' . $url[4]);
}
$description = '';
$description .= '<p>';
//post
$description .= $this->getPostDescription(
$postDisplayName,
$postAuthorHandle,
$postUri,
$postRecord,
'post'
);
if (isset($postRecord['embed']['$type'])) {
//post link embed
if ($postRecord['embed']['$type'] === 'app.bsky.embed.external') {
$description .= $this->parseExternal($postRecord['embed']['external'], $postAuthorDID);
} elseif (
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$postRecord['embed']['media']['$type'] === 'app.bsky.embed.external'
) {
$description .= $this->parseExternal($postRecord['embed']['media']['external'], $postAuthorDID);
}
//post images
if (
$postRecord['embed']['$type'] === 'app.bsky.embed.images' ||
(
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$postRecord['embed']['media']['$type'] === 'app.bsky.embed.images'
)
) {
$images = $post['post']['embed']['images'] ?? $post['post']['embed']['media']['images'];
foreach ($images as $image) {
$description .= $this->getPostImageDescription($image);
}
}
//post video
if (
$postRecord['embed']['$type'] === 'app.bsky.embed.video' ||
(
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$postRecord['embed']['media']['$type'] === 'app.bsky.embed.video'
)
) {
$description .= $this->getPostVideoDescription(
$postRecord['embed']['video'] ?? $postRecord['embed']['media']['video'],
$postAuthorDID
);
}
}
$description .= '</p>';
//quote post
if (
isset($postRecord['embed']) &&
(
$postRecord['embed']['$type'] === 'app.bsky.embed.record' ||
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia'
) &&
isset($post['post']['embed']['record'])
) {
$description .= '<p>';
$quotedRecord = $post['post']['embed']['record']['record'] ?? $post['post']['embed']['record'];
if (isset($quotedRecord['notFound']) && $quotedRecord['notFound']) { //deleted post
$description .= 'Quoted post deleted.';
} elseif (isset($quotedRecord['detached']) && $quotedRecord['detached']) { //detached quote
$uri_explode = explode('/', $quotedRecord['uri']);
$uri_reconstructed = self::URI . '/profile/' . $uri_explode[2] . '/post/' . $uri_explode[4];
$description .= '<a href="' . $uri_reconstructed . '">Quoted post detached.</a>';
} elseif (isset($quotedRecord['blocked']) && $quotedRecord['blocked']) { //blocked by quote author
$description .= 'Author of quoted post has blocked OP.';
} elseif (
($quotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView' ||
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#listView'
) {
$description .= $this->getListFeedDescription($quotedRecord);
} elseif (
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.starterpack' ||
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#starterPackViewBasic'
) {
$description .= $this->getStarterPackDescription($post['post']['embed']['record']);
} else {
$quotedAuthorDid = $quotedRecord['author']['did'];
$quotedDisplayName = $quotedRecord['author']['displayName'] ?? '';
$quotedDisplayName = e($quotedDisplayName);
$quotedAuthorHandle = $quotedRecord['author']['handle'] !== 'handle.invalid' ? '<i>@' . $quotedRecord['author']['handle'] . '</i>' : '';
$parts = explode('/', $quotedRecord['uri']);
$quotedPostId = end($parts);
$quotedPostUri = self::URI . '/profile/' . $this->fallbackAuthor($quotedRecord['author'], 'url') . '/post/' . $quotedPostId;
//quoted post - post
$description .= $this->getPostDescription(
$quotedDisplayName,
$quotedAuthorHandle,
$quotedPostUri,
$quotedRecord,
'quote'
);
if (isset($quotedRecord['value']['embed']['$type'])) {
//quoted post - post link embed
if ($quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.external') {
$description .= $this->parseExternal($quotedRecord['value']['embed']['external'], $quotedAuthorDid);
}
//quoted post - post video
if (
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.video' ||
(
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$quotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.video'
)
) {
$description .= $this->getPostVideoDescription(
$quotedRecord['value']['embed']['video'] ?? $quotedRecord['value']['embed']['media']['video'],
$quotedAuthorDid
);
}
//quoted post - post images
if (
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.images' ||
(
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$quotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.images'
)
) {
foreach ($quotedRecord['embeds'] as $embed) {
if (
$embed['$type'] === 'app.bsky.embed.images#view' ||
($embed['$type'] === 'app.bsky.embed.recordWithMedia#view' && $embed['media']['$type'] === 'app.bsky.embed.images#view')
) {
$images = $embed['images'] ?? $embed['media']['images'];
foreach ($images as $image) {
$description .= $this->getPostImageDescription($image);
}
}
}
}
}
}
$description .= '</p>';
}
//reply
if ($replyContext && isset($post['reply']) && !isset($post['reply']['parent']['notFound'])) {
$replyPost = $post['reply']['parent'];
$replyPostRecord = $replyPost['record'];
$description .= '<hr/>';
$description .= '<p>';
$replyPostAuthorDID = $replyPost['author']['did'];
$replyPostAuthorHandle = $replyPost['author']['handle'] !== 'handle.invalid' ? '<i>@' . $replyPost['author']['handle'] . '</i> ' : '';
$replyPostDisplayName = $replyPost['author']['displayName'] ?? '';
$replyPostDisplayName = e($replyPostDisplayName);
$replyPostUri = self::URI . '/profile/' . $this->fallbackAuthor($replyPost['author'], 'url') . '/post/' . explode('app.bsky.feed.post/', $replyPost['uri'])[1];
// reply post
$description .= $this->getPostDescription(
$replyPostDisplayName,
$replyPostAuthorHandle,
$replyPostUri,
$replyPostRecord,
'reply'
);
if (isset($replyPostRecord['embed']['$type'])) {
//post link embed
if ($replyPostRecord['embed']['$type'] === 'app.bsky.embed.external') {
$description .= $this->parseExternal($replyPostRecord['embed']['external'], $replyPostAuthorDID);
} elseif (
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.external'
) {
$description .= $this->parseExternal($replyPostRecord['embed']['media']['external'], $replyPostAuthorDID);
}
//post images
if (
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.images' ||
(
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.images'
)
) {
$images = $replyPost['embed']['images'] ?? $replyPost['embed']['media']['images'];
foreach ($images as $image) {
$description .= $this->getPostImageDescription($image);
}
}
//post video
if (
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.video' ||
(
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.video'
)
) {
$description .= $this->getPostVideoDescription(
$replyPostRecord['embed']['video'] ?? $replyPostRecord['embed']['media']['video'],
$replyPostAuthorDID
);
}
}
$description .= '</p>';
//quote post
if (
isset($replyPostRecord['embed']) &&
($replyPostRecord['embed']['$type'] === 'app.bsky.embed.record' || $replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia') &&
isset($replyPost['embed']['record'])
) {
$description .= '<p>';
$replyQuotedRecord = $replyPost['embed']['record']['record'] ?? $replyPost['embed']['record'];
if (isset($replyQuotedRecord['notFound']) && $replyQuotedRecord['notFound']) { //deleted post
$description .= 'Quoted post deleted.';
} elseif (isset($replyQuotedRecord['detached']) && $replyQuotedRecord['detached']) { //detached quote
$uri_explode = explode('/', $replyQuotedRecord['uri']);
$uri_reconstructed = self::URI . '/profile/' . $uri_explode[2] . '/post/' . $uri_explode[4];
$description .= '<a href="' . $uri_reconstructed . '">Quoted post detached.</a>';
} elseif (isset($replyQuotedRecord['blocked']) && $replyQuotedRecord['blocked']) { //blocked by quote author
$description .= 'Author of quoted post has blocked OP.';
} elseif (
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView' ||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#listView'
) {
$description .= $this->getListFeedDescription($replyQuotedRecord);
} elseif (
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.starterpack' ||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#starterPackViewBasic'
) {
$description .= $this->getStarterPackDescription($replyPost['embed']['record']);
} else {
$quotedAuthorDid = $replyQuotedRecord['author']['did'];
$quotedDisplayName = $replyQuotedRecord['author']['displayName'] ?? '';
$quotedDisplayName = e($quotedDisplayName);
$quotedAuthorHandle = $replyQuotedRecord['author']['handle'] !== 'handle.invalid' ? '<i>@' . $replyQuotedRecord['author']['handle'] . '</i>' : '';
$parts = explode('/', $replyQuotedRecord['uri']);
$quotedPostId = end($parts);
$quotedPostUri = self::URI . '/profile/' . $this->fallbackAuthor($replyQuotedRecord['author'], 'url') . '/post/' . $quotedPostId;
//quoted post - post
$description .= $this->getPostDescription(
$quotedDisplayName,
$quotedAuthorHandle,
$quotedPostUri,
$replyQuotedRecord,
'quote'
);
if (isset($replyQuotedRecord['value']['embed']['$type'])) {
//quoted post - post link embed
if ($replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.external') {
$description .= $this->parseExternal($replyQuotedRecord['value']['embed']['external'], $quotedAuthorDid);
}
//quoted post - post video
if (
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.video' ||
(
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$replyQuotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.video'
)
) {
$description .= $this->getPostVideoDescription(
$replyQuotedRecord['value']['embed']['video'] ?? $replyQuotedRecord['value']['embed']['media']['video'],
$quotedAuthorDid
);
}
//quoted post - post images
if (
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.images' ||
(
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$replyQuotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.images'
)
) {
foreach ($replyQuotedRecord['embeds'] as $embed) {
if (
$embed['$type'] === 'app.bsky.embed.images#view' ||
($embed['$type'] === 'app.bsky.embed.recordWithMedia#view' && $embed['media']['$type'] === 'app.bsky.embed.images#view')
) {
$images = $embed['images'] ?? $embed['media']['images'];
foreach ($images as $image) {
$description .= $this->getPostImageDescription($image);
}
}
}
}
}
}
$description .= '</p>';
}
}
$item['content'] = $description;
$this->items[] = $item;
}
}
private function getPostVideoDescription(array $video, $authorDID)
{
//https://video.bsky.app/watch/$did/$cid/thumbnail.jpg
$videoCID = $video['ref']['$link'];
$videoMime = $video['mimeType'];
$thumbnail = "poster=\"https://video.bsky.app/watch/$authorDID/$videoCID/thumbnail.jpg\"" ?? '';
$videoURL = "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=$authorDID&cid=$videoCID";
return "<figure><video loop $thumbnail controls src=\"$videoURL\" type=\"$videoMime\"/></figure>";
}
private function getPostImageDescription(array $image)
{
$thumbnailUrl = $image['thumb'];
$fullsizeUrl = $image['fullsize'];
$alt = strlen($image['alt']) > 0 ? '<figcaption>' . e($image['alt']) . '</figcaption>' : '';
return "<figure><a href=\"$fullsizeUrl\"><img src=\"$thumbnailUrl\"></a>$alt</figure>";
}
private function getPostDescription(
string $postDisplayName,
string $postAuthorHandle,
string $postUri,
array $postRecord,
string $type
) {
$description = '';
if ($type === 'quote') {
// Quoted post/reply from bbb @bbb.com:
$postType = isset($postRecord['reply']) ? 'reply' : 'post';
$description .= "<a href=\"$postUri\">Quoted $postType</a> from <b>$postDisplayName</b> $postAuthorHandle:<br>";
} elseif ($type === 'reply') {
// Replying to aaa @aaa.com's post/reply:
$postType = isset($postRecord['reply']) ? 'reply' : 'post';
$description .= "Replying to <b>$postDisplayName</b> $postAuthorHandle's <a href=\"$postUri\">$postType</a>:<br>";
} else {
// aaa @aaa.com posted:
$description .= "<b>$postDisplayName</b> $postAuthorHandle <a href=\"$postUri\">posted</a>:<br>";
}
$description .= $this->textToDescription($postRecord);
return $description;
}
//used if handle verification fails, fallsback to displayName or DID depending on context.
private function fallbackAuthor($author, $reason)
{
if ($author['handle'] === 'handle.invalid') {
switch ($reason) {
case 'url':
return $author['did'];
case 'display':
$displayName = $author['displayName'] ?? '';
return e($displayName);
}
}
return $author['handle'];
}
private function generateVerboseTitle($post)
{
//use "Post by A, replying to B, quoting C" instead of post contents
$title = '';
if (isset($post['reason']) && str_contains($post['reason']['$type'], 'reasonRepost')) {
$title .= 'Repost by ' . $this->fallbackAuthor($post['reason']['by'], 'display') . ', post by ' . $this->fallbackAuthor($post['post']['author'], 'display');
} else {
$title .= 'Post by ' . $this->fallbackAuthor($post['post']['author'], 'display');
}
if (isset($post['reply'])) {
if (isset($post['reply']['parent']['blocked'])) {
$replyAuthor = 'blocked user';
} elseif (isset($post['reply']['parent']['notFound'])) {
$replyAuthor = 'deleted post';
} else {
$replyAuthor = $this->fallbackAuthor($post['reply']['parent']['author'], 'display');
}
$title .= ', replying to ' . $replyAuthor;
}
if (
isset($post['post']['embed']) &&
isset($post['post']['embed']['record']) &&
//if not starter pack, feed or list
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.feed.defs#generatorView' &&
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.graph.defs#listView' &&
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.graph.defs#starterPackViewBasic'
) {
if (isset($post['post']['embed']['record']['blocked'])) {
$quotedAuthor = 'blocked user';
} elseif (isset($post['post']['embed']['record']['notFound'])) {
$quotedAuthor = 'deleted psost';
} elseif (isset($post['post']['embed']['record']['detached'])) {
$quotedAuthor = 'detached post';
} else {
$quotedAuthor = $this->fallbackAuthor($post['post']['embed']['record']['record']['author'] ?? $post['post']['embed']['record']['author'], 'display');
}
$title .= ', quoting ' . $quotedAuthor;
}
return $title;
}
private function resolveHandle($handle)
{
$uri = 'https://public.api.bsky.app/xrpc/com.atproto.identity.resolveHandle?handle=' . urlencode($handle);
$response = json_decode(getContents($uri), true);
return $response['did'];
}
private function getProfile($did)
{
$uri = 'https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor=' . urlencode($did);
$response = json_decode(getContents($uri), true);
return $response;
}
private function getAuthorFeed($did, $filter)
{
$uri = 'https://public.api.bsky.app/xrpc/app.bsky.feed.getAuthorFeed?actor=' . urlencode($did) . '&filter=' . urlencode($filter) . '&limit=30';
if (Debug::isEnabled()) {
$this->logger->debug($uri);
}
$response = json_decode(getContents($uri), true);
return $response;
}
//Embed for generated feeds and lists
private function getListFeedDescription(array $record): string
{
$feedViewAvatar = isset($record['avatar']) ? '<img src="' . preg_replace('/\/img\/avatar\//', '/img/avatar_thumbnail/', $record['avatar']) . '">' : '';
$feedViewName = e($record['displayName'] ?? $record['name']);
$feedViewDescription = e($record['description'] ?? '');
$authorDisplayName = e($record['creator']['displayName']);
$authorHandle = e($record['creator']['handle']);
$likeCount = isset($record['likeCount']) ? '<br>Liked by ' . e($record['likeCount']) . ' users' : '';
preg_match('/\/([^\/]+)$/', $record['uri'], $matches);
if (($record['purpose'] ?? '') === 'app.bsky.graph.defs#modlist') {
$typeURL = '/lists/';
$typeDesc = 'moderation list';
} elseif (($record['purpose'] ?? '') === 'app.bsky.graph.defs#curatelist') {
$typeURL = '/lists/';
$typeDesc = 'list';
} else {
$typeURL = '/feed/';
$typeDesc = 'feed';
}
$uri = e('https://bsky.app/profile/' . $record['creator']['did'] . $typeURL . $matches[1]);
return <<<END
<blockquote>
<b><a href="{$uri}">{$feedViewName}</a></b><br/>
Bluesky {$typeDesc} by <b>{$authorDisplayName}</b> <i>@{$authorHandle}</i>
<figure>
{$feedViewAvatar}
<figcaption>{$feedViewDescription}{$likeCount}</figcaption>
</figure>
</blockquote>
END;
}
private function getStarterPackDescription(array $record): string
{
if (!isset($record['record'])) {
return 'Failed to get starter pack information.';
}
$starterpackRecord = $record['record'];
$starterpackName = e($starterpackRecord['name']);
$starterpackDescription = e($starterpackRecord['description']);
$creatorDisplayName = e($record['creator']['displayName']);
$creatorHandle = e($record['creator']['handle']);
preg_match('/\/([^\/]+)$/', $starterpackRecord['list'], $matches);
$uri = e('https://bsky.app/starter-pack/' . $record['creator']['did'] . '/' . $matches[1]);
return <<<END
<blockquote>
<b><a href="{$uri}">{$starterpackName}</a></b><br/>
Bluesky starter pack by <b>{$creatorDisplayName}</b> <i>@{$creatorHandle}</i><br/>
{$starterpackDescription}
</blockquote>
END;
}
}

View File

@ -1,218 +0,0 @@
<?php
class BodaccBridge extends BridgeAbstract
{
const NAME = 'BODACC';
const URI = 'https://bodacc-datadila.opendatasoft.com/';
const DESCRIPTION = 'Fetches announces from the French Government "Bulletin Officiel Des Annonces Civiles et Commerciales".';
const CACHE_TIMEOUT = 86400;
const MAINTAINER = 'quent1';
const PARAMETERS = [
'Annonces commerciales' => [
'departement' => [
'name' => 'Département',
'type' => 'list',
'values' => [
'Tous' => null,
'Ain' => '01',
'Aisne' => '02',
'Allier' => '03',
'Alpes-de-Haute-Provence' => '04',
'Hautes-Alpes' => '05',
'Alpes-Maritimes' => '06',
'Ardèche' => '07',
'Ardennes' => '08',
'Ariège' => '09',
'Aube' => '10',
'Aude' => '11',
'Aveyron' => '12',
'Bouches-du-Rhône' => '13',
'Calvados' => '14',
'Cantal' => '15',
'Charente' => '16',
'Charente-Maritime' => '17',
'Cher' => '18',
'Corrèze' => '19',
'Corse-du-Sud' => '2A',
'Haute-Corse' => '2B',
'Côte-d\'Or' => '21',
'Côtes-d\'Armor' => '22',
'Creuse' => '23',
'Dordogne' => '24',
'Doubs' => '25',
'Drôme' => '26',
'Eure' => '27',
'Eure-et-Loir' => '28',
'Finistère' => '29',
'Gard' => '30',
'Haute-Garonne' => '31',
'Gers' => '32',
'Gironde' => '33',
'Hérault' => '34',
'Ille-et-Vilaine' => '35',
'Indre' => '36',
'Indre-et-Loire' => '37',
'Isère' => '38',
'Jura' => '39',
'Landes' => '40',
'Loir-et-Cher' => '41',
'Loire' => '42',
'Haute-Loire' => '43',
'Loire-Atlantique' => '44',
'Loiret' => '45',
'Lot' => '46',
'Lot-et-Garonne' => '47',
'Lozère' => '48',
'Maine-et-Loire' => '49',
'Manche' => '50',
'Marne' => '51',
'Haute-Marne' => '52',
'Mayenne' => '53',
'Meurthe-et-Moselle' => '54',
'Meuse' => '55',
'Morbihan' => '56',
'Moselle' => '57',
'Nièvre' => '58',
'Nord' => '59',
'Oise' => '60',
'Orne' => '61',
'Pas-de-Calais' => '62',
'Puy-de-Dôme' => '63',
'Pyrénées-Atlantiques' => '64',
'Hautes-Pyrénées' => '65',
'Pyrénées-Orientales' => '66',
'Bas-Rhin' => '67',
'Haut-Rhin' => '68',
'Rhône' => '69',
'Haute-Saône' => '70',
'Saône-et-Loire' => '71',
'Sarthe' => '72',
'Savoie' => '73',
'Haute-Savoie' => '74',
'Paris' => '75',
'Seine-Maritime' => '76',
'Seine-et-Marne' => '77',
'Yvelines' => '78',
'Deux-Sèvres' => '79',
'Somme' => '80',
'Tarn' => '81',
'Tarn-et-Garonne' => '82',
'Var' => '83',
'Vaucluse' => '84',
'Vendée' => '85',
'Vienne' => '86',
'Haute-Vienne' => '87',
'Vosges' => '88',
'Yonne' => '89',
'Territoire de Belfort' => '90',
'Essonne' => '91',
'Hauts-de-Seine' => '92',
'Seine-Saint-Denis' => '93',
'Val-de-Marne' => '94',
'Val-d\'Oise' => '95',
'Guadeloupe' => '971',
'Martinique' => '972',
'Guyane' => '973',
'La Réunion' => '974',
'Saint-Pierre-et-Miquelon' => '975',
'Mayotte' => '976',
'Saint-Barthélemy' => '977',
'Saint-Martin' => '978',
'Terres australes et antarctiques françaises' => '984',
'Wallis-et-Futuna' => '986',
'Polynésie française' => '987',
'Nouvelle-Calédonie' => '988',
'Île de Clipperton' => '989'
]
],
'famille' => [
'name' => 'Famille',
'type' => 'list',
'values' => [
'Toutes' => null,
'Annonces diverses' => 'divers',
'Créations' => 'creation',
'Dépôts des comptes' => 'dpc',
'Immatriculations' => 'immatriculation',
'Modifications diverses' => 'modification',
'Procédures collectives' => 'collective',
'Procédures de conciliation' => 'conciliation',
'Procédures de rétablissement professionnel' => 'retablissement_professionnel',
'Radiations' => 'radiation',
'Ventes et cessions' => 'vente'
]
],
'type' => [
'name' => 'Type',
'type' => 'list',
'values' => [
'Tous' => null,
'Avis initial' => 'annonce',
'Avis d\'annulation' => 'annulation',
'Avis rectificatif' => 'rectificatif'
]
]
]
];
public function collectData()
{
$parameters = [
'select' => 'id,dateparution,typeavis_lib,familleavis_lib,commercant,ville,cp',
'order_by' => 'id desc',
'limit' => 50,
];
$where = [];
if (!empty($this->getInput('departement'))) {
$where[] = 'numerodepartement="' . $this->getInput('departement') . '"';
}
if (!empty($this->getInput('famille'))) {
$where[] = 'familleavis="' . $this->getInput('famille') . '"';
}
if (!empty($this->getInput('type'))) {
$where[] = 'typeavis="' . $this->getInput('type') . '"';
}
if ($where !== []) {
$parameters['where'] = implode(' and ', $where);
}
$url = urljoin(self::URI, '/api/explore/v2.1/catalog/datasets/annonces-commerciales/records?' . http_build_query($parameters));
$data = Json::decode(getContents($url), false);
foreach ($data->results as $result) {
if (
!isset(
$result->id,
$result->dateparution,
$result->typeavis_lib,
$result->familleavis_lib,
$result->commercant,
$result->ville,
$result->cp
)
) {
continue;
}
$title = sprintf(
'[%s] %s - %s à %s (%s)',
$result->typeavis_lib,
$result->familleavis_lib,
$result->commercant,
$result->ville,
$result->cp
);
$this->items[] = [
'uid' => $result->id,
'timestamp' => strtotime($result->dateparution),
'title' => $title,
];
}
}
}

View File

@ -1218,15 +1218,14 @@ EOT;
$table = $this->generateEventDetailsTable($event); $table = $this->generateEventDetailsTable($event);
$imgsrc = $event['BannerURL']; $imgsrc = $event['BannerURL'];
$FShareURL = $event['FShareURL'];
return <<<EOT return <<<EOT
<img title="Event Banner URL" src="$imgsrc"> <img title="Event Banner URL" src="$imgsrc"></img>
<br> <br>
$table $table
<br> <br>
More Details are available on the <a href="$FShareURL">BookMyShow website</a>. More Details are available on the <a href="${event['FShareURL']}">BookMyShow website</a>.
EOT; EOT;
} }
/** /**
@ -1293,15 +1292,14 @@ EOT;
$synopsis = preg_replace(self::SYNOPSIS_REGEX, '', $data['EventSynopsis']); $synopsis = preg_replace(self::SYNOPSIS_REGEX, '', $data['EventSynopsis']);
$eventTrailerURL = $data['EventTrailerURL'];
return <<<EOT return <<<EOT
<img title="Movie Poster" src="$imgsrc"></img> <img title="Movie Poster" src="$imgsrc"></img>
<div>$table</div> <div>$table</div>
<p>$innerHtml</p> <p>$innerHtml</p>
<p>$synopsis</p> <p>${synopsis}</p>
More Details are available on the <a href="$url">BookMyShow website</a> and a trailer is available More Details are available on the <a href="$url">BookMyShow website</a> and a trailer is available
<a href="$eventTrailerURL" title="Trailer URL">here</a> <a href="${data['EventTrailerURL']}" title="Trailer URL">here</a>
EOT; EOT;
} }
/** /**

View File

@ -1,63 +0,0 @@
<?php
class BruegelBridge extends BridgeAbstract
{
const NAME = 'Bruegel';
const URI = 'https://www.bruegel.org';
const DESCRIPTION = 'European think-tank commentary and publications.';
const MAINTAINER = 'KappaPrajd';
const PARAMETERS = [
[
'category' => [
'name' => 'Category',
'type' => 'list',
'defaultValue' => '/publications',
'values' => [
'Publications' => '/publications',
'Commentary' => '/commentary'
]
]
]
];
public function getIcon()
{
return self::URI . '/themes/custom/bruegel/assets/favicon/android-icon-72x72.png';
}
public function collectData()
{
$url = self::URI . $this->getInput('category');
$html = getSimpleHTMLDOM($url);
$articles = $html->find('.c-listing__content article');
foreach ($articles as $article) {
$title = $article->find('.c-list-item__title a span', 0)->plaintext;
$content = trim($article->find('.c-list-item__description', 0)->plaintext);
$publishDate = $article->find('.c-list-item__date', 0)->plaintext;
$href = $article->find('.c-list-item__title a', 0)->getAttribute('href');
$item = [
'title' => $title,
'content' => $content,
'timestamp' => strtotime($publishDate),
'uri' => self::URI . $href,
'author' => $this->getAuthor($article),
];
$this->items[] = $item;
}
}
private function getAuthor($article)
{
$authorsElements = $article->find('.c-list-item__authors a');
$authors = array_map(function ($author) {
return $author->plaintext;
}, $authorsElements);
return join(', ', $authors);
}
}

View File

@ -38,20 +38,50 @@ class BrutBridge extends BridgeAbstract
] ]
]; ];
const CACHE_TIMEOUT = 1800; // 30 mins
private $jsonRegex = '/window\.__PRELOADED_STATE__ = ((?:.*)});/';
public function collectData() public function collectData()
{ {
$url = $this->getURI(); $html = getSimpleHTMLDOM($this->getURI());
$html = getSimpleHTMLDOM($url);
$regex = '/window.__PRELOADED_STATE__ = (.*);/'; $results = $html->find('div.results', 0);
preg_match($regex, $html, $parts);
$data = Json::decode($parts[1], false); foreach ($results->find('li.col-6.col-sm-4.col-md-3.col-lg-2.px-2.pb-4') as $li) {
foreach ($data->medias->index as $uid => $media) { $item = [];
$this->items[] = [
'uid' => $uid, $videoPath = self::URI . $li->children(0)->href;
'title' => $media->metadata->slug, $videoPageHtml = getSimpleHTMLDOMCached($videoPath, 3600);
'uri' => $media->share_url,
'timestamp' => $media->published_at, $json = $this->extractJson($videoPageHtml);
]; $id = array_keys((array) $json->media->index)[0];
$item['uri'] = $videoPath;
$item['title'] = $json->media->index->$id->title;
$item['timestamp'] = $json->media->index->$id->published_at;
$item['enclosures'][] = $json->media->index->$id->media->thumbnail;
$description = $json->media->index->$id->description;
$article = '';
if (is_null($json->media->index->$id->media->seo_article) === false) {
$article = markdownToHtml($json->media->index->$id->media->seo_article);
}
$item['content'] = <<<EOD
<video controls poster="{$json->media->index->$id->media->thumbnail}" preload="none">
<source src="{$json->media->index->$id->media->mp4_url}" type="video/mp4">
</video>
<p>{$description}</p>
{$article}
EOD;
$this->items[] = $item;
if (count($this->items) >= 10) {
break;
}
} }
} }
@ -60,14 +90,35 @@ class BrutBridge extends BridgeAbstract
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) { if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
return self::URI . '/' . $this->getInput('edition') . '/' . $this->getInput('category'); return self::URI . '/' . $this->getInput('edition') . '/' . $this->getInput('category');
} }
return parent::getURI(); return parent::getURI();
} }
public function getName() public function getName()
{ {
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) { if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
return $this->getKey('category') . ' - ' . $this->getKey('edition') . ' - Brut.'; return $this->getKey('category') . ' - ' .
$this->getKey('edition') . ' - Brut.';
} }
return parent::getName(); return parent::getName();
} }
/**
* Extract JSON from page
*/
private function extractJson($html)
{
if (!preg_match($this->jsonRegex, $html, $parts)) {
returnServerError('Failed to extract data from page');
}
$data = json_decode($parts[1]);
if ($data === false) {
returnServerError('Failed to decode extracted data');
}
return $data;
}
} }

View File

@ -159,12 +159,12 @@ class BugzillaBridge extends BridgeAbstract
protected function getUser($user) protected function getUser($user)
{ {
// Check if the user endpoint is available // Check if the user endpoint is available
if ($this->loadCacheValue($this->instance . 'userEndpointClosed')) { if ($this->loadCacheValue($this->instance . 'userEndpointClosed', 86400)) {
return $user; return $user;
} }
$cache = $this->loadCacheValue($this->instance . $user); $cache = $this->loadCacheValue($this->instance . $user);
if ($cache) { if (!is_null($cache)) {
return $cache; return $cache;
} }

View File

@ -206,7 +206,7 @@ class BukowskisBridge extends BridgeAbstract
$this->items[] = [ $this->items[] = [
'title' => $title, 'title' => $title,
'uri' => $baseUrl . $relative_url, 'uri' => $baseUrl . $relative_url,
'uid' => $relative_url, 'uid' => $lot->getAttribute('data-lot-id'),
'content' => count($images) > 0 ? "<img src='$images[0]'/><br/>$title" : $title, 'content' => count($images) > 0 ? "<img src='$images[0]'/><br/>$title" : $title,
'enclosures' => array_slice($images, 1), 'enclosures' => array_slice($images, 1),
]; ];

View File

@ -71,9 +71,7 @@ class BundesbankBridge extends BridgeAbstract
$item['content'] .= '<strong>' . $study->find('.teasable__subtitle', 0)->plaintext . '</strong>'; $item['content'] .= '<strong>' . $study->find('.teasable__subtitle', 0)->plaintext . '</strong>';
} }
$teasable = $study->find('.teasable__text', 0); $item['content'] .= '<p>' . $study->find('.teasable__text', 0)->plaintext . '</p>';
$teasableText = $teasable->plaintext ?? '';
$item['content'] .= '<p>' . $teasableText . '</p>';
$item['timestamp'] = strtotime($study->find('.teasable__date', 0)->plaintext); $item['timestamp'] = strtotime($study->find('.teasable__date', 0)->plaintext);

View File

@ -26,16 +26,18 @@ TMPL;
https://www.bundestag.de/ajax/filterlist/de/parlament/praesidium/parteienfinanzierung/fundstellen50000/462002-462002 https://www.bundestag.de/ajax/filterlist/de/parlament/praesidium/parteienfinanzierung/fundstellen50000/462002-462002
URI; URI;
// Get the main page // Get the main page
$html = getSimpleHTMLDOMCached($ajaxUri, self::CACHE_TIMEOUT); $html = getSimpleHTMLDOMCached($ajaxUri, self::CACHE_TIMEOUT)
or returnServerError('Could not request AJAX list.');
// Build the URL from the first anchor element. The list is sorted by year, descending, so the first element is the current year. // Build the URL from the first anchor element. The list is sorted by year, descending, so the first element is the current year.
$firstAnchor = $html->find('a', 0) $firstAnchor = $html->find('a', 0)
or returnServerError('Could not find the proper HTML element.'); or returnServerError('Could not find the proper HTML element.');
$url = $firstAnchor->href; $url = 'https://www.bundestag.de' . $firstAnchor->href;
// Get the actual page with the soft money donations // Get the actual page with the soft money donations
$html = getSimpleHTMLDOMCached($url, self::CACHE_TIMEOUT); $html = getSimpleHTMLDOMCached($url, self::CACHE_TIMEOUT)
or returnServerError('Could not request ' . $url);
$rows = $html->find('table.table > tbody > tr') $rows = $html->find('table.table > tbody > tr')
or returnServerError('Could not find the proper HTML elements.'); or returnServerError('Could not find the proper HTML elements.');

View File

@ -1,28 +0,0 @@
<?php
class BundesverbandFuerFreieKammernBridge extends XPathAbstract
{
const NAME = 'Bundesverband für freie Kammern e.V.';
const URI = 'https://www.bffk.de/aktuelles/aktuelle-nachrichten.html';
const DESCRIPTION = 'Aktuelle Nachrichten';
const MAINTAINER = 'hleskien';
const FEED_SOURCE_URL = 'https://www.bffk.de/aktuelles/aktuelle-nachrichten.html';
//const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"]/@href';
const XPATH_EXPRESSION_ITEM = '//ul[@class="article-list"]/li';
const XPATH_EXPRESSION_ITEM_TITLE = './/a/text()';
const XPATH_EXPRESSION_ITEM_CONTENT = './/a/text()';
const XPATH_EXPRESSION_ITEM_URI = './/a/@href';
//const XPATH_EXPRESSION_ITEM_AUTHOR = './/';
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/span/i';
//const XPATH_EXPRESSION_ITEM_ENCLOSURES = './';
//const XPATH_EXPRESSION_ITEM_CATEGORIES = './/';
protected function formatItemTimestamp($value)
{
$value = trim($value, '()');
$dti = DateTimeImmutable::createFromFormat('d.m.Y', $value);
$dti = $dti->setTime(0, 0, 0);
return $dti->getTimestamp();
}
}

View File

@ -1,6 +1,6 @@
<?php <?php
class CNETBridge extends SitemapBridge class CNETBridge extends BridgeAbstract
{ {
const MAINTAINER = 'ORelio'; const MAINTAINER = 'ORelio';
const NAME = 'CNET News'; const NAME = 'CNET News';
@ -14,105 +14,101 @@ class CNETBridge extends SitemapBridge
'type' => 'list', 'type' => 'list',
'values' => [ 'values' => [
'All articles' => '', 'All articles' => '',
'Tech' => 'tech', 'Apple' => 'apple',
'Money' => 'personal-finance', 'Google' => 'google',
'Home' => 'home', 'Microsoft' => 'tags-microsoft',
'Wellness' => 'health', 'Computers' => 'topics-computers',
'Energy' => 'home/energy-and-utilities', 'Mobile' => 'topics-mobile',
'Deals' => 'deals', 'Sci-Tech' => 'topics-sci-tech',
'Computing' => 'tech/computing', 'Security' => 'topics-security',
'Mobile' => 'tech/mobile', 'Internet' => 'topics-internet',
'Science' => 'science', 'Tech Industry' => 'topics-tech-industry'
'Services' => 'tech/services-and-software' ]
] ]
],
'limit' => self::LIMIT
] ]
]; ];
private function cleanArticle($article_html)
{
$offset_p = strpos($article_html, '<p>');
$offset_figure = strpos($article_html, '<figure');
$offset = ($offset_figure < $offset_p ? $offset_figure : $offset_p);
$article_html = substr($article_html, $offset);
$article_html = str_replace('href="/', 'href="' . self::URI, $article_html);
$article_html = str_replace(' height="0"', '', $article_html);
$article_html = str_replace('<noscript>', '', $article_html);
$article_html = str_replace('</noscript>', '', $article_html);
$article_html = StripWithDelimiters($article_html, '<a class="clickToEnlarge', '</a>');
$article_html = stripWithDelimiters($article_html, '<span class="nowPlaying', '</span>');
$article_html = stripWithDelimiters($article_html, '<span class="duration', '</span>');
$article_html = stripWithDelimiters($article_html, '<script', '</script>');
$article_html = stripWithDelimiters($article_html, '<svg', '</svg>');
return $article_html;
}
public function collectData() public function collectData()
{ {
$topic = $this->getInput('topic'); // Retrieve and check user input
$limit = $this->getInput('limit'); $topic = str_replace('-', '/', $this->getInput('topic'));
$limit = empty($limit) ? 10 : $limit; if (!empty($topic) && (substr_count($topic, '/') > 1 || !ctype_alpha(str_replace('/', '', $topic)))) {
returnClientError('Invalid topic: ' . $topic);
}
$url_pattern = empty($topic) ? '' : self::URI . $topic; // Retrieve webpage
$sitemap_latest = self::URI . 'sitemaps/article/' . date('Y/m') . '.xml'; $pageUrl = self::URI . (empty($topic) ? 'news/' : $topic . '/');
$sitemap_previous = self::URI . 'sitemaps/article/' . date('Y/m', strtotime('last day of previous month')) . '.xml'; $html = getSimpleHTMLDOM($pageUrl);
$links = array_merge( // Process articles
$this->sitemapXmlToList($this->getSitemapXml($sitemap_latest, true), $url_pattern, $limit), foreach ($html->find('div.assetBody, div.riverPost') as $element) {
$this->sitemapXmlToList($this->getSitemapXml($sitemap_previous, true), $url_pattern, $limit) if (count($this->items) >= 10) {
break;
}
$article_title = trim($element->find('h2, h3', 0)->plaintext);
$article_uri = self::URI . substr($element->find('a', 0)->href, 1);
$article_thumbnail = $element->parent()->find('img[src]', 0)->src;
$article_timestamp = strtotime($element->find('time.assetTime, div.timeAgo', 0)->plaintext);
$article_author = trim($element->find('a[rel=author], a.name', 0)->plaintext);
$article_content = '<p><b>' . trim($element->find('p.dek', 0)->plaintext) . '</b></p>';
if (is_null($article_thumbnail)) {
$article_thumbnail = extractFromDelimiters($element->innertext, '<img src="', '"');
}
if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, self::URI . 'news/') !== false) {
$article_html = getSimpleHTMLDOMCached($article_uri) or $article_html = null;
if (!is_null($article_html)) {
if (empty($article_thumbnail)) {
$article_thumbnail = $article_html->find('div.originalImage', 0);
}
if (empty($article_thumbnail)) {
$article_thumbnail = $article_html->find('span.imageContainer', 0);
}
if (is_object($article_thumbnail)) {
$article_thumbnail = $article_thumbnail->find('img', 0)->src;
}
$article_content .= trim(
$this->cleanArticle(
extractFromDelimiters(
$article_html,
'<article',
'<footer'
)
)
); );
if ($limit > 0 && count($links) > $limit) {
$links = array_slice($links, 0, $limit);
}
if (empty($links)) {
returnClientError('Failed to retrieve article list');
}
foreach ($links as $article_uri) {
$article_dom = convertLazyLoading(getSimpleHTMLDOMCached($article_uri));
$title = trim($article_dom->find('h1', 0)->plaintext);
$author = $article_dom->find('span.c-assetAuthor_name', 0);
$headline = $article_dom->find('p.c-contentHeader_description', 0);
$content = $article_dom->find('div.c-pageArticle_content, div.single-article__content, div.article-main-body', 0);
$date = null;
$enclosure = null;
foreach ($article_dom->find('script[type=application/ld+json]') as $ldjson) {
$datePublished = extractFromDelimiters($ldjson->innertext, '"datePublished":"', '"');
if ($datePublished !== false) {
$date = strtotime($datePublished);
}
$imageObject = extractFromDelimiters($ldjson->innertext, 'ImageObject","url":"', '"');
if ($imageObject !== false) {
$enclosure = $imageObject;
}
}
foreach ($content->find('div.c-shortcodeGallery') as $cleanup) {
$cleanup->outertext = '';
}
foreach ($content->find('figure') as $figure) {
$img = $figure->find('img', 0);
if ($img) {
$figure->outertext = $img->outertext;
}
}
$content = $content->innertext;
if ($enclosure) {
$content = "<div><img src=\"$enclosure\" /></div>" . $content;
}
if ($headline) {
$content = '<p><b>' . $headline->plaintext . '</b></p><br />' . $content;
} }
$item = []; $item = [];
$item['uri'] = $article_uri; $item['uri'] = $article_uri;
$item['title'] = $title; $item['title'] = $article_title;
$item['author'] = $article_author;
if ($author) { $item['timestamp'] = $article_timestamp;
$item['author'] = $author->plaintext; $item['enclosures'] = [$article_thumbnail];
} $item['content'] = $article_content;
$item['content'] = $content;
if (!is_null($date)) {
$item['timestamp'] = $date;
}
if (!is_null($enclosure)) {
$item['enclosures'] = [$enclosure];
}
$this->items[] = $item; $this->items[] = $item;
} }
} }
}
} }

View File

@ -43,8 +43,10 @@ class CNETFranceBridge extends FeedExpander
$this->collectExpandableDatas('https://www.cnetfrance.fr/feeds/rss/news/'); $this->collectExpandableDatas('https://www.cnetfrance.fr/feeds/rss/news/');
} }
protected function parseItem(array $item) protected function parseItem($feedItem)
{ {
$item = parent::parseItem($feedItem);
foreach ($this->bannedTitle as $term) { foreach ($this->bannedTitle as $term) {
if (preg_match('/' . $term . '/mi', $item['title']) === 1) { if (preg_match('/' . $term . '/mi', $item['title']) === 1) {
return null; return null;
@ -52,7 +54,7 @@ class CNETFranceBridge extends FeedExpander
} }
foreach ($this->bannedURL as $term) { foreach ($this->bannedURL as $term) {
if (preg_match('#' . $term . '#mi', $item['uri'])) { if (preg_match('/' . $term . '/mi', $item['uri']) === 1) {
return null; return null;
} }
} }

View File

@ -36,43 +36,12 @@ class CVEDetailsBridge extends BridgeAbstract
private $vendor = ''; private $vendor = '';
private $product = ''; private $product = '';
public function collectData()
{
if ($this->html == null) {
$this->fetchContent();
}
$var = $this->html->find('#searchresults > div > div.row');
foreach ($var as $i => $tr) {
$uri = $tr->find('h3 > a', 0)->href ?? null;
$title = $tr->find('h3 > a', 0)->innertext;
$content = $tr->find('.cvesummarylong', 0)->innertext ?? '';
$timestamp = $tr->find('[data-tsvfield="publishDate"]', 0)->innertext ?? 0;
$this->items[] = [
'uri' => $uri,
'title' => $title,
'timestamp' => $timestamp,
'content' => $content,
'categories' => [$this->vendor],
'enclosures' => [],
'uid' => $title,
];
if (count($this->items) >= 30) {
break;
}
}
}
// Make the actual request to cvedetails.com and stores the response
// (HTML) for later use and extract vendor and product from it.
private function fetchContent()
{
// build url
// Return the URL to query. // Return the URL to query.
// Because of the optional product ID, we need to attach it if it is // Because of the optional product ID, we need to attach it if it is
// set. The search result page has the exact same structure (with and // set. The search result page has the exact same structure (with and
// without the product ID). // without the product ID).
private function buildUrl()
{
$url = self::URI . '/vulnerability-list/vendor_id-' . $this->getInput('vendor_id'); $url = self::URI . '/vulnerability-list/vendor_id-' . $this->getInput('vendor_id');
if ($this->getInput('product_id') !== '') { if ($this->getInput('product_id') !== '') {
$url .= '/product_id-' . $this->getInput('product_id'); $url .= '/product_id-' . $this->getInput('product_id');
@ -82,21 +51,32 @@ class CVEDetailsBridge extends BridgeAbstract
// number, which should be mostly accurate. // number, which should be mostly accurate.
$url .= '?order=1'; // Order by CVE number DESC $url .= '?order=1'; // Order by CVE number DESC
$html = getSimpleHTMLDOM($url); return $url;
}
// Make the actual request to cvedetails.com and stores the response
// (HTML) for later use and extract vendor and product from it.
private function fetchContent()
{
$html = getSimpleHTMLDOM($this->buildUrl());
$this->html = defaultLinkTo($html, self::URI); $this->html = defaultLinkTo($html, self::URI);
$vendor = $html->find('#contentdiv h1 > a', 0); $vendor = $html->find('#contentdiv > h1 > a', 0);
if ($vendor == null) { if ($vendor == null) {
returnServerError('Invalid Vendor ID ' . $this->getInput('vendor_id') . ' or Product ID ' . $this->getInput('product_id')); returnServerError('Invalid Vendor ID ' .
$this->getInput('vendor_id') .
' or Product ID ' .
$this->getInput('product_id'));
} }
$this->vendor = $vendor->innertext; $this->vendor = $vendor->innertext;
$product = $html->find('#contentdiv h1 > a', 1); $product = $html->find('#contentdiv > h1 > a', 1);
if ($product != null) { if ($product != null) {
$this->product = $product->innertext; $this->product = $product->innertext;
} }
} }
// Build the name of the feed.
public function getName() public function getName()
{ {
if ($this->getInput('vendor_id') == '') { if ($this->getInput('vendor_id') == '') {
@ -114,4 +94,52 @@ class CVEDetailsBridge extends BridgeAbstract
return $name; return $name;
} }
// Pull the data from the HTML response and fill the items..
public function collectData()
{
if ($this->html == null) {
$this->fetchContent();
}
foreach ($this->html->find('#vulnslisttable .srrowns') as $i => $tr) {
// There are some optional vulnerability types, which will be
// added to the categories as well as the CWE number -- which is
// always given.
$categories = [$this->vendor];
$enclosures = [];
$cwe = $tr->find('td', 2)->find('a', 0);
if ($cwe != null) {
$cwe = $cwe->innertext;
$categories[] = 'CWE-' . $cwe;
$enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe . '.html';
}
$c = $tr->find('td', 4)->innertext;
if (trim($c) != '') {
$categories[] = $c;
}
if ($this->product != '') {
$categories[] = $this->product;
}
// The CVE number itself
$title = $tr->find('td', 1)->find('a', 0)->innertext;
$this->items[] = [
'uri' => $tr->find('td', 1)->find('a', 0)->href,
'title' => $title,
'timestamp' => $tr->find('td', 5)->innertext,
'content' => $tr->next_sibling()->innertext,
'categories' => $categories,
'enclosures' => $enclosures,
'uid' => $tr->find('td', 1)->find('a', 0)->innertext,
];
// We only want to fetch the latest 10 CVEs
if (count($this->items) >= 10) {
break;
}
}
}
} }

View File

@ -1,118 +1,44 @@
<?php <?php
class CarThrottleBridge extends BridgeAbstract class CarThrottleBridge extends FeedExpander
{ {
const NAME = 'Car Throttle'; const NAME = 'Car Throttle ';
const URI = 'https://www.carthrottle.com/'; const URI = 'https://www.carthrottle.com';
const DESCRIPTION = 'Get the latest car-related news from Car Throttle.'; const DESCRIPTION = 'Get the latest car-related news from Car Throttle.';
const MAINTAINER = 't0stiman'; const MAINTAINER = 't0stiman';
const DONATION_URI = 'https://ko-fi.com/tostiman';
const PARAMETERS = [
'Show articles from these categories:' => [
'news' => [
'name' => 'news',
'type' => 'checkbox'
],
'reviews' => [
'name' => 'reviews',
'type' => 'checkbox'
],
'features' => [
'name' => 'features',
'type' => 'checkbox'
],
'videos' => [
'name' => 'videos',
'type' => 'checkbox'
],
'gaming' => [
'name' => 'gaming',
'type' => 'checkbox'
]
]
];
public function collectData() public function collectData()
{ {
$this->items = []; $this->collectExpandableDatas('https://www.carthrottle.com/rss', 10);
$this->handleCategory('news');
$this->handleCategory('reviews');
$this->handleCategory('features');
$this->handleCategory2('videos', 'video');
$this->handleCategory('gaming');
} }
private function handleCategory($category) protected function parseItem($feedItem)
{ {
if ($this->getInput($category)) { $item = parent::parseItem($feedItem);
$this->getArticles($category);
//fetch page
$articlePage = getSimpleHTMLDOMCached($feedItem->link)
or returnServerError('Could not retrieve ' . $feedItem->link);
$subtitle = $articlePage->find('p.standfirst', 0);
$article = $articlePage->find('div.content_field', 0);
$item['content'] = str_get_html($subtitle . $article);
//convert <iframe>s to <a>s. meant for embedded videos.
foreach ($item['content']->find('iframe') as $found) {
$iframeUrl = $found->getAttribute('src');
if ($iframeUrl) {
$found->outertext = '<a href="' . $iframeUrl . '">' . $iframeUrl . '</a>';
} }
} }
private function handleCategory2($categoryParameter, $categoryURLname) //remove scripts from the text
{ foreach ($item['content']->find('script') as $remove) {
if ($this->getInput($categoryParameter)) { $remove->outertext = '';
$this->getArticles($categoryURLname);
}
} }
private function getArticles($category) return $item;
{
$categoryPage = getSimpleHTMLDOMCached(self::URI . $category);
//for each post
foreach ($categoryPage->find('div.cmg-card') as $post) {
$item = [];
$titleElement = $post->find('a.title')[0];
$post_uri = self::URI . $titleElement->getAttribute('href');
if (!isset($post_uri) || $post_uri == '') {
continue;
}
$item['uri'] = $post_uri;
$item['title'] = $titleElement->innertext;
$articlePage = getSimpleHTMLDOMCached($item['uri']);
$item['author'] = $this->parseAuthor($articlePage);
$articleImage = $articlePage->find('figure')[0];
$article = $articlePage->find('div.first-column div.body')[0];
//remove ads
foreach ($article->find('aside') as $ad) {
$ad->outertext = '';
}
$summary = $articlePage->find('div.summary')[0];
//these are supposed to be hidden
foreach ($article->find('.visually-hidden') as $found) {
$found->outertext = '';
}
$item['content'] = $summary . $articleImage . $article;
array_push($this->items, $item);
}
}
private function parseAuthor($articlePage)
{
$authorDivs = $articlePage->find('div address');
if (!$authorDivs) {
return '';
}
$a = $authorDivs[0]->find('a')[0];
if ($a) {
return $a->innertext;
}
return $authorDivs[0]->innertext;
} }
} }

View File

@ -34,8 +34,10 @@ class CaschyBridge extends FeedExpander
); );
} }
protected function parseItem(array $item) protected function parseItem($feedItem)
{ {
$item = parent::parseItem($feedItem);
if (strpos($item['uri'], 'https://stadt-bremerhaven.de/') !== 0) { if (strpos($item['uri'], 'https://stadt-bremerhaven.de/') !== 0) {
return $item; return $item;
} }
@ -54,7 +56,7 @@ class CaschyBridge extends FeedExpander
{ {
// remove unwanted stuff // remove unwanted stuff
foreach ( foreach (
$article->find('div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content, $article->find('div.video-container, div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content,
div.wp-embed, p.wp-caption-text, script') as $element div.wp-embed, p.wp-caption-text, script') as $element
) { ) {
$element->remove(); $element->remove();

View File

@ -1,266 +0,0 @@
<?php
class CentreFranceBridge extends BridgeAbstract
{
const NAME = 'Centre France Newspapers';
const URI = 'https://www.centrefrance.com/';
const DESCRIPTION = 'Common bridge for all Centre France group newspapers.';
const CACHE_TIMEOUT = 7200; // 2h
const MAINTAINER = 'quent1';
const PARAMETERS = [
'global' => [
'newspaper' => [
'name' => 'Newspaper',
'type' => 'list',
'values' => [
'La Montagne' => 'lamontagne.fr',
'Le Populaire du Centre' => 'lepopulaire.fr',
'La République du Centre' => 'larep.fr',
'Le Berry Républicain' => 'leberry.fr',
'L\'Yonne Républicaine' => 'lyonne.fr',
'L\'Écho Républicain' => 'lechorepublicain.fr',
'Le Journal du Centre' => 'lejdc.fr',
'L\'Éveil de la Haute-Loire' => 'leveil.fr',
'Le Pays' => 'le-pays.fr'
]
],
'remove-reserved-for-subscribers-articles' => [
'name' => 'Remove reserved for subscribers articles',
'type' => 'checkbox',
'title' => 'Filter out articles that are only available to subscribers'
],
'limit' => [
'name' => 'Limit',
'type' => 'number',
'title' => 'How many articles to fetch. 0 to disable.',
'required' => true,
'defaultValue' => 15
]
],
'Local news' => [
'locality-slug' => [
'name' => 'Locality slug',
'type' => 'text',
'required' => false,
'title' => 'Fetch articles for a specific locality. If not set, headlines from the front page will be used instead.',
'exampleValue' => 'moulins-03000'
],
]
];
private static array $monthNumberByFrenchName = [
'janvier' => 1, 'février' => 2, 'mars' => 3, 'avril' => 4, 'mai' => 5, 'juin' => 6, 'juillet' => 7,
'août' => 8, 'septembre' => 9, 'octobre' => 10, 'novembre' => 11, 'décembre' => 12
];
public function collectData()
{
$value = $this->getInput('limit');
if (is_numeric($value) && (int)$value >= 0) {
$limit = $value;
} else {
$limit = static::PARAMETERS['global']['limit']['defaultValue'];
}
if (empty($this->getInput('newspaper'))) {
return;
}
$localitySlug = $this->getInput('locality-slug') ?? '';
$alreadyFoundArticlesURIs = [];
$newspaperUrl = 'https://www.' . $this->getInput('newspaper') . '/' . $localitySlug . '/';
$html = getSimpleHTMLDOM($newspaperUrl);
// Articles are detected through their titles
foreach ($html->find('.c-titre') as $articleTitleDOMElement) {
$articleLinkDOMElement = $articleTitleDOMElement->find('a', 0);
// Ignore articles in the « Les + partagés » block
if (strpos($articleLinkDOMElement->id, 'les_plus_partages') !== false) {
continue;
}
$articleURI = $articleLinkDOMElement->href;
// If the URI has already been processed, ignore it
if (in_array($articleURI, $alreadyFoundArticlesURIs, true)) {
continue;
}
// If news are filtered for a specific locality, filter out article for other localities
if ($localitySlug !== '' && !str_contains($articleURI, $localitySlug)) {
continue;
}
$articleTitle = '';
// If article is reserved for subscribers
if ($articleLinkDOMElement->find('span.premium-picto', 0)) {
if ($this->getInput('remove-reserved-for-subscribers-articles') === true) {
continue;
}
$articleTitle .= '🔒 ';
}
$articleTitleDOMElement = $articleLinkDOMElement->find('span[data-tb-title]', 0);
if ($articleTitleDOMElement === null) {
continue;
}
if ($limit > 0 && count($this->items) === $limit) {
break;
}
$articleTitle .= $articleLinkDOMElement->find('span[data-tb-title]', 0)->innertext;
$articleFullURI = urljoin('https://www.' . $this->getInput('newspaper') . '/', $articleURI);
$item = [
'title' => $articleTitle,
'uri' => $articleFullURI,
...$this->collectArticleData($articleFullURI)
];
$this->items[] = $item;
$alreadyFoundArticlesURIs[] = $articleURI;
}
}
private function collectArticleData($uri): array
{
$html = getSimpleHTMLDOMCached($uri, 86400 * 90); // 90d
$item = [
'enclosures' => [],
];
$articleInformations = $html->find('#content hgroup > div.typo-p3 > *');
if (is_array($articleInformations) && $articleInformations !== []) {
$publicationDateIndex = 0;
// Article author
$probableAuthorName = strip_tags($articleInformations[0]->innertext);
if (str_starts_with($probableAuthorName, 'Par ')) {
$publicationDateIndex = 1;
$item['author'] = substr($probableAuthorName, 4);
}
// Article publication date
preg_match('/Publié le (\d{2}) (.+) (\d{4})( à (\d{2})h(\d{2}))?/', strip_tags($articleInformations[$publicationDateIndex]->innertext), $articleDateParts);
if ($articleDateParts !== [] && array_key_exists($articleDateParts[2], self::$monthNumberByFrenchName)) {
$articleDate = new \DateTime('midnight');
$articleDate->setDate($articleDateParts[3], self::$monthNumberByFrenchName[$articleDateParts[2]], $articleDateParts[1]);
if (count($articleDateParts) === 7) {
$articleDate->setTime($articleDateParts[5], $articleDateParts[6]);
}
$item['timestamp'] = $articleDate->getTimestamp();
}
}
$articleContent = $html->find('#content>div.flex+div.grid section>.z-10')[0] ?? null;
if ($articleContent instanceof \simple_html_dom_node) {
$articleHiddenParts = $articleContent->find('.ad-slot, #cf-digiteka-player');
if (is_array($articleHiddenParts)) {
foreach ($articleHiddenParts as $articleHiddenPart) {
$articleContent->removeChild($articleHiddenPart);
}
}
$item['content'] = $articleContent->innertext;
}
$articleIllustration = $html->find('#content>div.flex+div.grid section>figure>img');
if (is_array($articleIllustration) && count($articleIllustration) === 1) {
$item['enclosures'][] = $articleIllustration[0]->getAttribute('src');
}
$articleAudio = $html->find('audio[src^="https://api.octopus.saooti.com/"]');
if (is_array($articleAudio) && count($articleAudio) === 1) {
$item['enclosures'][] = $articleAudio[0]->getAttribute('src');
}
$articleTags = $html->find('#content>div.flex+div.grid section>.bg-gray-light>a.border-gray-dark');
if (is_array($articleTags)) {
$item['categories'] = array_map(static fn ($articleTag) => $articleTag->innertext, $articleTags);
}
$explode = explode('_', $uri);
$array_reverse = array_reverse($explode);
$string = $array_reverse[0];
$uid = rtrim($string, '/');
if (is_numeric($uid)) {
$item['uid'] = $uid;
}
// If the article is a "grand format", we use another parsing strategy
if ($item['content'] === '' && $html->find('article') !== []) {
$articleContent = $html->find('article > section');
foreach ($articleContent as $contentPart) {
if ($contentPart->find('#journo') !== []) {
$item['author'] = $contentPart->find('#journo')->innertext;
continue;
}
$item['content'] .= $contentPart->innertext;
}
}
$item['content'] = str_replace('<span class="p-premium">premium</span>', '🔒', $item['content']);
$item['content'] = trim($item['content']);
return $item;
}
public function getName()
{
if (empty($this->getInput('newspaper'))) {
return static::NAME;
}
$newspaperNameByDomain = array_flip(self::PARAMETERS['global']['newspaper']['values']);
if (!isset($newspaperNameByDomain[$this->getInput('newspaper')])) {
return static::NAME;
}
$completeTitle = $newspaperNameByDomain[$this->getInput('newspaper')];
if (!empty($this->getInput('locality-slug'))) {
$localityName = explode('-', $this->getInput('locality-slug'));
array_pop($localityName);
$completeTitle .= ' ' . ucfirst(implode('-', $localityName));
}
return $completeTitle;
}
public function getIcon()
{
if (empty($this->getInput('newspaper'))) {
return static::URI . '/favicon.ico';
}
return 'https://www.' . $this->getInput('newspaper') . '/favicon.ico';
}
public function detectParameters($url)
{
$regex = '/^(https?:\/\/)?(www\.)?([a-z-]+\.fr)(\/)?([a-z-]+-[0-9]{5})?(\/)?$/';
$url = strtolower($url);
if (preg_match($regex, $url, $urlMatches) === 0) {
return null;
}
if (!in_array($urlMatches[3], self::PARAMETERS['global']['newspaper']['values'], true)) {
return null;
}
return [
'newspaper' => $urlMatches[3],
'locality-slug' => empty($urlMatches[5]) ? null : $urlMatches[5]
];
}
}

View File

@ -18,6 +18,25 @@ class CeskaTelevizeBridge extends BridgeAbstract
] ]
]; ];
private function fixChars($text)
{
return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
}
private function getUploadTimeFromString($string)
{
if (strpos($string, 'dnes') !== false) {
return strtotime('today');
} elseif (strpos($string, 'včera') !== false) {
return strtotime('yesterday');
} elseif (!preg_match('/(\d+).\s(\d+).(\s(\d+))?/', $string, $match)) {
returnServerError('Could not get date from Česká televize string');
}
$date = sprintf('%04d-%02d-%02d', $match[3] ?? date('Y'), $match[2], $match[1]);
return strtotime($date);
}
public function collectData() public function collectData()
{ {
$url = $this->getInput('url'); $url = $this->getInput('url');
@ -38,43 +57,25 @@ class CeskaTelevizeBridge extends BridgeAbstract
$this->feedName .= " ({$category})"; $this->feedName .= " ({$category})";
} }
foreach ($html->find('#episodeListSection a[data-testid=card]') as $element) { foreach ($html->find('#episodeListSection a[data-testid=next-link]') as $element) {
$itemContent = $element->find('p[class^=content-]', 0); $itemTitle = $element->find('h3', 0);
$itemDate = $element->find('div[class^=playTime-] span, [data-testid=episode-item-broadcast] span', 0); $itemContent = $element->find('div[class^=content-]', 0);
$itemDate = $element->find('div[class^=playTime-] span', 0);
// Remove special characters and whitespace $itemThumbnail = $element->find('img', 0);
$cleanDate = preg_replace('/[^0-9.]/', '', $itemDate->plaintext); $itemUri = self::URI . $element->getAttribute('href');
$item = [ $item = [
'title' => $this->fixChars($element->find('h3', 0)->plaintext), 'title' => $this->fixChars($itemTitle->plaintext),
'uri' => self::URI . $element->getAttribute('href'), 'uri' => $itemUri,
'content' => '<img src="' . $element->find('img', 0)->getAttribute('srcset') . '" /><br />' . $this->fixChars($itemContent->plaintext), 'content' => '<img src="' . $itemThumbnail->getAttribute('src') . '" /><br />'
'timestamp' => $this->getUploadTimeFromString($cleanDate), . $this->fixChars($itemContent->plaintext),
'timestamp' => $this->getUploadTimeFromString($itemDate->plaintext)
]; ];
$this->items[] = $item; $this->items[] = $item;
} }
} }
private function getUploadTimeFromString($string)
{
if (strpos($string, 'dnes') !== false) {
return strtotime('today');
} elseif (strpos($string, 'včera') !== false) {
return strtotime('yesterday');
} elseif (!preg_match('/(\d+).(\d+).((\d+))?/', $string, $match)) {
returnServerError('Could not get date from Česká televize string');
}
$date = sprintf('%04d-%02d-%02d', $match[3] ?? date('Y'), $match[2], $match[1]);
return strtotime($date);
}
private function fixChars($text)
{
return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
}
public function getURI() public function getURI()
{ {
return $this->feedUri ?? parent::getURI(); return $this->feedUri ?? parent::getURI();

View File

@ -79,9 +79,9 @@ class CodebergBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
$url = $this->getURI(); $html = getSimpleHTMLDOM($this->getURI());
$html = getSimpleHTMLDOM($url);
$html = defaultLinkTo($html, $url); $html = defaultLinkTo($html, $this->getURI());
switch ($this->queriedContext) { switch ($this->queriedContext) {
case 'Commits': case 'Commits':
@ -181,12 +181,7 @@ class CodebergBridge extends BridgeAbstract
$item['title'] = $message->find('span.message-wrapper', 0)->plaintext; $item['title'] = $message->find('span.message-wrapper', 0)->plaintext;
$item['uri'] = $tr->find('td.sha', 0)->find('a', 0)->href; $item['uri'] = $tr->find('td.sha', 0)->find('a', 0)->href;
$item['author'] = $tr->find('td.author', 0)->plaintext; $item['author'] = $tr->find('td.author', 0)->plaintext;
$item['timestamp'] = $tr->find('td', 3)->find('span', 0)->title;
$var = $tr->find('td', 3);
$var1 = $var->find('span', 0);
if ($var1) {
$item['timestamp'] = $var1->title;
}
if ($message->find('pre.commit-body', 0)) { if ($message->find('pre.commit-body', 0)) {
$message->find('pre.commit-body', 0)->style = ''; $message->find('pre.commit-body', 0)->style = '';
@ -205,22 +200,17 @@ class CodebergBridge extends BridgeAbstract
*/ */
private function extractIssues($html) private function extractIssues($html)
{ {
$issueList = $html->find('div#issue-list', 0); $div = $html->find('div.issue.list', 0);
foreach ($issueList->find('div.flex-item') as $div) { foreach ($div->find('li.item') as $li) {
$item = []; $item = [];
$number = trim($div->find('a.index,ml-0.mr-2', 0)->plaintext); $number = trim($li->find('a.index,ml-0.mr-2', 0)->plaintext);
$item['title'] = $div->find('a.issue-title', 0)->plaintext . ' (' . $number . ')'; $item['title'] = $li->find('a.title', 0)->plaintext . ' (' . $number . ')';
$item['uri'] = $div->find('a.issue-title', 0)->href; $item['uri'] = $li->find('a.title', 0)->href;
$item['timestamp'] = $li->find('span.time-since', 0)->title;
$time = $div->find('relative-time.time-since', 0); $item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
if ($time) {
$item['timestamp'] = $time->datetime;
}
//$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
// Fetch issue page // Fetch issue page
$issuePage = getSimpleHTMLDOMCached($item['uri'], 3600); $issuePage = getSimpleHTMLDOMCached($item['uri'], 3600);
@ -228,7 +218,7 @@ class CodebergBridge extends BridgeAbstract
$item['content'] = $issuePage->find('div.timeline-item.comment.first', 0)->find('div.render-content.markup', 0); $item['content'] = $issuePage->find('div.timeline-item.comment.first', 0)->find('div.render-content.markup', 0);
foreach ($div->find('a.ui.label') as $label) { foreach ($li->find('a.ui.label') as $label) {
$item['categories'][] = $label->plaintext; $item['categories'][] = $label->plaintext;
} }
@ -260,11 +250,7 @@ class CodebergBridge extends BridgeAbstract
} }
$item['author'] = $div->find('a.author', 0)->innertext; $item['author'] = $div->find('a.author', 0)->innertext;
$item['timestamp'] = $div->find('span.time-since', 0)->title;
$timeSince = $div->find('span.time-since', 0);
if ($timeSince) {
$item['timestamp'] = $timeSince->title;
}
$this->items[] = $item; $this->items[] = $item;
} }
@ -275,36 +261,23 @@ class CodebergBridge extends BridgeAbstract
*/ */
private function extractPulls($html) private function extractPulls($html)
{ {
$div = $html->find('div#issue-list', 0); $div = $html->find('div.issue.list', 0);
$var2 = $div->find('div.flex-item'); foreach ($div->find('li.item') as $li) {
foreach ($var2 as $li) {
$item = []; $item = [];
$number = trim($li->find('a.index,ml-0.mr-2', 0)->plaintext); $number = trim($li->find('a.index,ml-0.mr-2', 0)->plaintext);
$a = $li->find('a.issue-title', 0); $item['title'] = $li->find('a.title', 0)->plaintext . ' (' . $number . ')';
$item['title'] = $a->plaintext . ' (' . $number . ')'; $item['uri'] = $li->find('a.title', 0)->href;
$item['uri'] = $a->href; $item['timestamp'] = $li->find('span.time-since', 0)->title;
$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
$time = $li->find('relative-time.time-since', 0);
if ($time) {
$item['timestamp'] = $time->datetime;
}
// Extracting the author is a bit awkward after they changed their html
//$desc = $li->find('div.desc', 0);
//$item['author'] = $desc->find('a', 1)->plaintext;
// Fetch pull request page // Fetch pull request page
$pullRequestPage = getSimpleHTMLDOMCached($item['uri'], 3600); $pullRequestPage = getSimpleHTMLDOMCached($item['uri'], 3600);
$pullRequestPage = defaultLinkTo($pullRequestPage, self::URI); $pullRequestPage = defaultLinkTo($pullRequestPage, self::URI);
$var = $pullRequestPage->find('ui.timeline', 0); $item['content'] = $pullRequestPage->find('ui.timeline', 0)->find('div.render-content.markup', 0);
if ($var) {
$var1 = $var->find('div.render-content.markup', 0);
$item['content'] = $var1;
}
foreach ($li->find('a.ui.label') as $label) { foreach ($li->find('a.ui.label') as $label) {
$item['categories'][] = $label->plaintext; $item['categories'][] = $label->plaintext;
@ -407,9 +380,6 @@ EOD;
*/ */
private function stripSvg($html) private function stripSvg($html)
{ {
if ($html === null) {
return null;
}
if ($html->find('svg', 0)) { if ($html->find('svg', 0)) {
$html->find('svg', 0)->outertext = ''; $html->find('svg', 0)->outertext = '';
} }

View File

@ -2,65 +2,59 @@
class ComicsKingdomBridge extends BridgeAbstract class ComicsKingdomBridge extends BridgeAbstract
{ {
const MAINTAINER = 'TReKiE'; const MAINTAINER = 'stjohnjohnson';
// const MAINTAINER = 'stjohnjohnson';
const NAME = 'Comics Kingdom Unofficial RSS'; const NAME = 'Comics Kingdom Unofficial RSS';
const URI = 'https://wp.comicskingdom.com/wp-json/wp/v2/ck_comic'; const URI = 'https://comicskingdom.com/';
const CACHE_TIMEOUT = 21600; // 6h const CACHE_TIMEOUT = 21600; // 6h
const DESCRIPTION = 'Comics Kingdom Unofficial RSS'; const DESCRIPTION = 'Comics Kingdom Unofficial RSS';
const PARAMETERS = [ [ const PARAMETERS = [ [
'comicname' => [ 'comicname' => [
'name' => 'Name of comic', 'name' => 'comicname',
'type' => 'text', 'type' => 'text',
'exampleValue' => 'mutts', 'exampleValue' => 'mutts',
'title' => 'The name of the comic in the URL after https://comicskingdom.com/', 'title' => 'The name of the comic in the URL after https://comicskingdom.com/',
'required' => true 'required' => true
],
'limit' => [
'name' => 'Limit',
'type' => 'number',
'title' => 'The number of recent comics to get',
'defaultValue' => 10
] ]
]]; ]];
protected $comicName;
public function collectData() public function collectData()
{ {
$json = getContents($this->getURI()); $html = getSimpleHTMLDOM($this->getURI(), [], [], true, false);
$data = json_decode($json, false);
if (isset($data[0]->_embedded->{'wp:term'}[0][0])) { // Get author from first page
$this->comicName = $data[0]->_embedded->{'wp:term'}[0][0]->name; $author = $html->find('div.author p', 0);
} ;
foreach ($data as $comicitem) { // Get current date/link
$link = $html->find('meta[property=og:url]', -1)->content;
for ($i = 0; $i < 3; $i++) {
$item = []; $item = [];
$item['id'] = $comicitem->id; $page = getSimpleHTMLDOM($link);
$item['uri'] = $comicitem->yoast_head_json->og_url;
$item['author'] = str_ireplace('By ', '', $comicitem->ck_comic_byline); $imagelink = $page->find('meta[property=og:image]', 0)->content;
$item['title'] = $comicitem->yoast_head_json->title;
$item['timestamp'] = $comicitem->date; $date = explode('/', $link);
$item['content'] = '<img src="' . $comicitem->yoast_head_json->og_image[0]->url . '" />';
$item['id'] = $imagelink;
$item['uri'] = $link;
$item['author'] = $author;
$item['title'] = 'Comics Kingdom ' . $this->getInput('comicname');
$item['timestamp'] = DateTime::createFromFormat('Y-m-d', $date[count($date) - 1])->getTimestamp();
$item['content'] = '<img src="' . $imagelink . '" />';
$this->items[] = $item; $this->items[] = $item;
$link = $page->find('div.comic-viewer-inline a', 0)->href;
if (empty($link)) {
break; // allow bridge to continue if there's less than 3 comics
}
} }
} }
public function getURI() public function getURI()
{ {
if (!is_null($this->getInput('comicname'))) { if (!is_null($this->getInput('comicname'))) {
$params = [ return self::URI . urlencode($this->getInput('comicname'));
'ck_feature' => $this->getInput('comicname'),
'per_page' => $this->getInput('limit'),
'date_inclusive' => 'true',
'order' => 'desc',
'page' => '1',
'_embed' => 'true'
];
return self::URI . '?' . http_build_query($params);
} }
return parent::getURI(); return parent::getURI();
@ -68,8 +62,8 @@ class ComicsKingdomBridge extends BridgeAbstract
public function getName() public function getName()
{ {
if ($this->comicName) { if (!is_null($this->getInput('comicname'))) {
return $this->comicName . ' - Comics Kingdom'; return $this->getInput('comicname') . ' - Comics Kingdom';
} }
return parent::getName(); return parent::getName();

View File

@ -12,8 +12,9 @@ class CommonDreamsBridge extends FeedExpander
$this->collectExpandableDatas('http://www.commondreams.org/rss.xml', 10); $this->collectExpandableDatas('http://www.commondreams.org/rss.xml', 10);
} }
protected function parseItem(array $item) protected function parseItem($newsItem)
{ {
$item = parent::parseItem($newsItem);
$item['content'] = $this->extractContent($item['uri']); $item['content'] = $this->extractContent($item['uri']);
return $item; return $item;
} }

View File

@ -13,9 +13,11 @@ class CourrierInternationalBridge extends FeedExpander
$this->collectExpandableDatas(static::URI . 'feed/all/rss.xml', 20); $this->collectExpandableDatas(static::URI . 'feed/all/rss.xml', 20);
} }
protected function parseItem(array $item) protected function parseItem($feedItem)
{ {
$articlePage = getSimpleHTMLDOMCached($item['uri']); $item = parent::parseItem($feedItem);
$articlePage = getSimpleHTMLDOMCached($feedItem->link);
$content = $articlePage->find('.article-text, depeche-text', 0); $content = $articlePage->find('.article-text, depeche-text', 0);
if (!$content) { if (!$content) {
return $item; return $item;

View File

@ -109,7 +109,7 @@ class CrewbayBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
$url = $this->getURI(); $url = $this->getURI();
$html = getSimpleHTMLDOM($url); $html = getSimpleHTMLDOM($url) or returnClientError('No results for this query.');
$annonces = $html->find('#SearchResults div.result'); $annonces = $html->find('#SearchResults div.result');
$limit = 0; $limit = 0;

View File

@ -1,309 +0,0 @@
<?php
class CssSelectorBridge extends BridgeAbstract
{
const MAINTAINER = 'ORelio';
const NAME = 'CSS Selector Bridge';
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
const DESCRIPTION = 'Convert any site to RSS feed using CSS selectors (Advanced Users)';
const PARAMETERS = [
[
'home_page' => [
'name' => 'Site URL: Home page with latest articles',
'exampleValue' => 'https://example.com/blog/',
'required' => true
],
'url_selector' => [
'name' => 'Selector for article links or their parent elements',
'title' => <<<EOT
This bridge works using CSS selectors, e.g. "a.article" will match all <a class="article"
href="URL">TITLE</a> on home page, each one being treated as a feed item. &#10;&#13;
Instead of just a link you can selet one of its parent element. Everything inside that
element becomes feed item content, e.g. image and summary present on home page.
When doing so, the first link inside the selected element becomes feed item URL/Title.
EOT,
'exampleValue' => 'a.article',
'required' => true
],
'url_pattern' => [
'name' => '[Optional] Pattern for site URLs to keep in feed',
'title' => 'Optionally filter items by applying a regular expression on their URL',
'exampleValue' => '/blog/article/.*',
],
'content_selector' => [
'name' => '[Optional] Selector to expand each article content',
'title' => <<<EOT
When specified, the bridge will fetch each article from its URL
and extract content using the provided selector (Slower!)
EOT,
'exampleValue' => 'article.content',
],
'content_cleanup' => [
'name' => '[Optional] Content cleanup: List of items to remove',
'title' => 'Selector for unnecessary elements to remove inside article contents.',
'exampleValue' => 'div.ads, div.comments',
],
'title_cleanup' => [
'name' => '[Optional] Text to remove from expanded article title',
'title' => <<<EOT
When fetching each article page, feed item title comes from page title.
Specify here some text from page title that need to be removed, e.g. " | BlogName".
EOT,
'exampleValue' => ' | BlogName',
],
'discard_thumbnail' => [
'name' => '[Optional] Discard thumbnail set by site author',
'title' => 'Some sites set their logo as thumbnail for every article. Use this option to discard it.',
'type' => 'checkbox',
],
'thumbnail_as_header' => [
'name' => '[Optional] Insert thumbnail as article header',
'title' => 'Insert article main image on top of article contents.',
'type' => 'checkbox',
],
'limit' => self::LIMIT
]
];
protected $feedName = '';
protected $homepageUrl = '';
public function getURI()
{
$url = $this->homepageUrl;
if (empty($url)) {
$url = parent::getURI();
}
return $url;
}
public function getName()
{
if (!empty($this->feedName)) {
return $this->feedName;
}
return parent::getName();
}
public function collectData()
{
$this->homepageUrl = $this->getInput('home_page');
$url_selector = $this->getInput('url_selector');
$url_pattern = $this->getInput('url_pattern');
$content_selector = $this->getInput('content_selector');
$content_cleanup = $this->getInput('content_cleanup');
$title_cleanup = $this->getInput('title_cleanup');
$discard_thumbnail = $this->getInput('discard_thumbnail');
$thumbnail_as_header = $this->getInput('thumbnail_as_header');
$limit = $this->getInput('limit') ?? 10;
$html = defaultLinkTo(getSimpleHTMLDOM($this->homepageUrl), $this->homepageUrl);
$this->feedName = $this->titleCleanup($this->getPageTitle($html), $title_cleanup);
$items = $this->htmlFindEntries($html, $url_selector, $url_pattern, $limit, $content_cleanup);
if (empty($content_selector)) {
$this->items = $items;
} else {
foreach ($items as $item) {
$item = $this->expandEntryWithSelector(
$item['uri'],
$content_selector,
$content_cleanup,
$title_cleanup,
$item['title']
);
if ($discard_thumbnail && isset($item['enclosures'])) {
unset($item['enclosures']);
}
if ($thumbnail_as_header && isset($item['enclosures'][0])) {
$item['content'] = '<p><img src="' . $item['enclosures'][0] . '" /></p>' . $item['content'];
}
$this->items[] = $item;
}
}
}
/**
* Filter a list of URLs using a pattern and limit
* @param array $links List of URLs
* @param string $url_pattern Pattern to look for in URLs
* @param int $limit Optional maximum amount of URLs to return
* @return array Array of URLs
*/
protected function filterUrlList($links, $url_pattern, $limit = 0)
{
if (!empty($url_pattern)) {
$url_pattern = '/' . str_replace('/', '\/', $url_pattern) . '/';
$links = array_filter($links, function ($url) use ($url_pattern) {
return preg_match($url_pattern, $url) === 1;
});
}
if ($limit > 0 && count($links) > $limit) {
$links = array_slice($links, 0, $limit);
}
return $links;
}
/**
* Retrieve title from webpage URL or DOM
* @param string|object $page URL or DOM to retrieve title from
* @return string Webpage title
*/
protected function getPageTitle($page)
{
if (is_string($page)) {
$page = getSimpleHTMLDOMCached($page);
}
$title = html_entity_decode($page->find('title', 0)->plaintext);
return $title;
}
/**
* Clean Article title. Remove constant part that appears in every title such as blog name.
* @param string $title Title to clean, e.g. "Article Name | BlogName"
* @param string $title_cleanup string to remove from webpage title, e.g. " | BlogName"
* @return string Cleaned Title
*/
protected function titleCleanup($title, $title_cleanup)
{
if (!empty($title) && !empty($title_cleanup)) {
return trim(str_replace($title_cleanup, '', $title));
}
return $title;
}
/**
* Remove all elements from HTML content matching cleanup selector
* @param string|object $content HTML content as HTML object or string
* @return string|object Cleaned content (same type as input)
*/
protected function cleanArticleContent($content, $cleanup_selector)
{
$string_convert = false;
if (is_string($content)) {
$string_convert = true;
$content = str_get_html($content);
}
if (!empty($cleanup_selector)) {
foreach ($content->find($cleanup_selector) as $item_to_clean) {
$item_to_clean->outertext = '';
}
}
if ($string_convert) {
$content = $content->outertext;
}
return $content;
}
/**
* Retrieve first N link+title+truncated-content from webpage URL or DOM satisfying the specified criteria
* @param string|object $page URL or DOM to retrieve feed items from
* @param string $url_selector DOM selector for matching links or their parent element
* @param string $url_pattern Optional filter to keep only links matching the pattern
* @param int $limit Optional maximum amount of URLs to return
* @param string $content_cleanup Optional selector for removing elements, e.g. "div.ads, div.comments"
* @return array of items {'uri': entry_url, 'title': entry_title, ['content': when present in DOM] }
*/
protected function htmlFindEntries($page, $url_selector, $url_pattern = '', $limit = 0, $content_cleanup = null)
{
if (is_string($page)) {
$page = getSimpleHTMLDOM($page);
}
$links = $page->find($url_selector);
if (empty($links)) {
returnClientError('No results for URL selector');
}
$link_to_item = [];
foreach ($links as $link) {
$item = [];
if ($link->innertext != $link->plaintext) {
$item['content'] = $link->innertext;
}
if ($link->tag != 'a') {
$link = $link->find('a', 0);
if (is_null($link)) {
continue;
}
}
$item['uri'] = $link->href;
$item['title'] = $link->plaintext;
if (isset($item['content'])) {
$item['content'] = convertLazyLoading($item['content']);
$item['content'] = defaultLinkTo($item['content'], $item['uri']);
$item['content'] = $this->cleanArticleContent($item['content'], $content_cleanup);
}
$link_to_item[$link->href] = $item;
}
if (empty($link_to_item)) {
returnClientError('The provided URL selector matches some elements, but they do not contain links.');
}
$links = $this->filterUrlList(array_keys($link_to_item), $url_pattern, $limit);
if (empty($links)) {
returnClientError('No results for URL pattern');
}
$items = [];
foreach ($links as $link) {
$items[] = $link_to_item[$link];
}
return $items;
}
/**
* Retrieve article content from its URL using content selector and return a feed item
* @param string $entry_url URL to retrieve article from
* @param string $content_selector HTML selector for extracting content, e.g. "article.content"
* @param string $content_cleanup Optional selector for removing elements, e.g. "div.ads, div.comments"
* @param string $title_cleanup Optional string to remove from article title, e.g. " | BlogName"
* @param string $title_default Optional title to use when could not extract title reliably
* @return array Entry data: uri, title, content
*/
protected function expandEntryWithSelector($entry_url, $content_selector, $content_cleanup = null, $title_cleanup = null, $title_default = null)
{
if (empty($content_selector)) {
returnClientError('Please specify a content selector');
}
$entry_html = getSimpleHTMLDOMCached($entry_url);
$item = html_find_seo_metadata($entry_html);
if (empty($item['uri'])) {
$item['uri'] = $entry_url;
}
if (empty($item['title'])) {
$article_title = $this->getPageTitle($entry_html, $title_cleanup);
if (!empty($title_default) && (empty($article_title) || $article_title === $this->feedName)) {
$article_title = $title_default;
}
$item['title'] = $article_title;
}
$item['title'] = $this->titleCleanup($item['title'], $title_cleanup);
$article_content = $entry_html->find($content_selector);
if (!empty($article_content)) {
$article_content = $article_content[0];
$article_content = convertLazyLoading($article_content);
$article_content = defaultLinkTo($article_content, $entry_url);
$article_content = $this->cleanArticleContent($article_content, $content_cleanup);
$item['content'] = $article_content;
} else if (!empty($item['content'])) {
$item['content'] .= '<br /><p><em>Could not extract full content, selector may need to be updated.</em></p>';
}
return $item;
}
}

View File

@ -1,462 +0,0 @@
<?php
class CssSelectorComplexBridge extends BridgeAbstract
{
const MAINTAINER = 'Lars Stegman';
const NAME = 'CSS Selector Complex Bridge';
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
const DESCRIPTION = <<<EOT
Convert any site to RSS feed using CSS selectors (Advanced Users). The bridge first selects
the element describing the article entries. It then extracts the links to the articles from
these elements. It then, depending on the setting "Load article from page", either parses
the selected elements, or downloads the page for each article and parses those. Parsing the
elements or page is done using the provided selectors.
EOT;
const PARAMETERS = [
[
'home_page' => [
'name' => 'Site URL: Page with latest articles',
'exampleValue' => 'https://example.com/blog/',
'required' => true
],
'cookie' => [
'name' => '[Optional] Cookie',
'title' => <<<EOT
Use when the website does not send the page contents, unless a static cookie is included.
EOT,
'exampleValue' => 'sessionId=deadb33f'
],
'title_cleanup' => [
'name' => '[Optional] Text to remove from feed title',
'title' => <<<EOT
Text to remove from the feed title, which is read from the article list page.
EOT,
'exampleValue' => ' | BlogName',
],
'entry_element_selector' => [
'name' => 'Selector for article entry elements',
'title' => <<<EOT
This bridge works using CSS selectors, e.g. "div.article" will match all
<div class="article">...</div> on home page, each one being treated as a feed item.
Use the URL selector option to select the `a` element with the
`href` to the article link. If this option is not configured, the first encountered
`a` element is used.
EOT,
'exampleValue' => 'div.article',
'required' => true
],
'url_selector' => [
'name' => '[Optional] Selector for link elements',
'title' => <<<EOT
The selector to find `a` elements in the entry element. If empty,
the first encountered `a` element is used. The `href` property
is used to create entries in the feed.
EOT,
'exampleValue' => 'a.article',
'defaultValue' => 'a'
],
'url_pattern' => [
'name' => '[Optional] Pattern for site URLs to keep in feed',
'title' => 'Optionally filter items by applying a regular expression on their URL',
'exampleValue' => '/blog/article/.*',
],
'limit' => self::LIMIT,
'use_article_pages' => [
'name' => 'Load article from page',
'title' => <<<EOT
If true, the article page is load and parsed to get the article contents using
the css selectors. (Slower!)
Otherwise, the element selected by the article entry selector is used.
EOT,
'type' => 'checkbox'
],
'article_page_content_selector' => [
'name' => '[Optional] Selector to select article element',
'title' => 'Extract the article from its page using the provided selector',
'exampleValue' => 'article.content',
],
'content_cleanup' => [
'name' => '[Optional] Content cleanup: selector for items to remove',
'title' => 'Selector for unnecessary elements to remove inside article contents.',
'exampleValue' => 'div.ads, div.comments',
],
'title_selector' => [
'name' => '[Optional] Selector for the article title',
'title' => 'Selector to select the article title',
'defaultValue' => 'h1'
],
'category_selector' => [
'name' => '[Optional] Categories',
'title' => <<<EOT
Selector to extract the catgories the article has
EOT,
'exampleValue' => 'span.category, #main-category'
],
'author_selector' => [
'name' => '[Optional] Author',
'title' => <<<EOT
Selector to extract the author of the article. If multiple elements are selected
the first one is used.
EOT,
'exampleValue' => 'span#author'
],
'time_selector' => [
'name' => '[Optional] Time selector',
'title' => <<<EOT
Selector to extract the timestamp of the article. If the element
is an html5 `time` element, the value for the `datetime` attribute is used.
EOT,
],
'time_format' => [
'name' => '[Optional] Format string for parsing time',
'title' => <<<EOT
The format to use to parse the timestamp. See
https://www.php.net/manual/en/datetimeimmutable.createfromformat.php
for the format specification.
EOT
],
'remove_styling' => [
'name' => '[Optional] Remove styling',
'title' => 'Remove class and style attributes from the page elements',
'type' => 'checkbox'
]
]
];
private $feedName = '';
public function getURI()
{
$url = $this->getInput('home_page');
if (empty($url)) {
$url = parent::getURI();
}
return $url;
}
public function getName()
{
if (!empty($this->feedName)) {
return $this->feedName;
}
return parent::getName();
}
protected function getHeaders()
{
$headers = [];
$cookie = $this->getInput('cookie');
if (!empty($cookie)) {
$headers[] = 'Cookie: ' . $cookie;
}
return $headers;
}
public function collectData()
{
$url = $this->getInput('home_page');
$headers = $this->getHeaders();
$entry_element_selector = $this->getInput('entry_element_selector');
$url_selector = $this->getInput('url_selector');
$url_pattern = $this->getInput('url_pattern');
$limit = $this->getInput('limit') ?? 10;
$use_article_pages = $this->getInput('use_article_pages');
$article_page_content_selector = $this->getInput('article_page_content_selector');
$content_cleanup = $this->getInput('content_cleanup');
$title_selector = $this->getInput('title_selector');
$title_cleanup = $this->getInput('title_cleanup');
$time_selector = $this->getInput('time_selector');
$time_format = $this->getInput('time_format');
$category_selector = $this->getInput('category_selector');
$author_selector = $this->getInput('author_selector');
$remove_styling = $this->getInput('remove_styling');
$html = defaultLinkTo(getSimpleHTMLDOM($url, $headers), $url);
$this->feedName = $this->getTitle($html, $title_cleanup);
$entry_elements = $this->htmlFindEntryElements($html, $entry_element_selector, $url_selector, $url_pattern, $limit);
if (empty($entry_elements)) {
return;
}
// Fetch the elements from the article pages.
if ($use_article_pages) {
if (empty($article_page_content_selector)) {
returnClientError('`Article selector` is required when `Load article page` is enabled');
}
foreach (array_keys($entry_elements) as $uri) {
$entry_elements[$uri] = $this->fetchArticleElementFromPage($uri, $article_page_content_selector);
}
}
foreach ($entry_elements as $uri => $element) {
$entry = $this->parseEntryElement(
$element,
$title_selector,
$author_selector,
$category_selector,
$time_selector,
$time_format,
$content_cleanup,
$this->feedName,
$remove_styling
);
$entry['uri'] = $uri;
$this->items[] = $entry;
}
}
/**
* Filter a list of URLs using a pattern and limit
* @param array $links List of URLs
* @param string $url_pattern Pattern to look for in URLs
* @param int $limit Optional maximum amount of URLs to return
* @return array Array of URLs
*/
protected function filterUrlList($links, $url_pattern, $limit = 0)
{
if (!empty($url_pattern)) {
$url_pattern = '/' . str_replace('/', '\/', $url_pattern) . '/';
$links = array_filter($links, function ($url) use ($url_pattern) {
return preg_match($url_pattern, $url) === 1;
});
}
if ($limit > 0 && count($links) > $limit) {
$links = array_slice($links, 0, $limit);
}
return $links;
}
/**
* Retrieve title from webpage URL or DOM
* @param string|object $page URL or DOM to retrieve title from
* @param string $title_cleanup optional string to remove from webpage title, e.g. " | BlogName"
* @return string Webpage title
*/
protected function getTitle($page, $title_cleanup)
{
if (is_string($page)) {
$page = getSimpleHTMLDOMCached($page, 86400, $this->getHeaders());
}
$title = html_entity_decode($page->find('title', 0)->plaintext);
if (!empty($title)) {
$title = trim(str_replace($title_cleanup, '', $title));
}
return $title;
}
/**
* Remove all elements from HTML content matching cleanup selector
* @param string|object $content HTML content as HTML object or string
* @return string|object Cleaned content (same type as input)
*/
protected function cleanArticleContent($content, $cleanup_selector, $remove_styling)
{
$string_convert = false;
if (is_string($content)) {
$string_convert = true;
$content = str_get_html($content);
}
if (!empty($cleanup_selector)) {
foreach ($content->find($cleanup_selector) as $item_to_clean) {
$item_to_clean->outertext = '';
}
}
if ($remove_styling) {
foreach (['class', 'style'] as $attribute_to_remove) {
foreach ($content->find('[' . $attribute_to_remove . ']') as $item_to_clean) {
$item_to_clean->removeAttribute($attribute_to_remove);
}
}
}
if ($string_convert) {
$content = $content->outertext;
}
return $content;
}
/**
* Retrieve first N link+element from webpage URL or DOM satisfying the specified criteria
* @param string|object $page URL or DOM to retrieve feed items from
* @param string $entry_selector DOM selector for matching HTML elements that contain article
* entries
* @param string $url_selector DOM selector for matching links
* @param string $url_pattern Optional filter to keep only links matching the pattern
* @param int $limit Optional maximum amount of URLs to return
* @return array of items { <uri> => <html-element> }
*/
protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0)
{
if (is_string($page)) {
$page = getSimpleHTMLDOM($page, $this->getHeaders());
}
$entryElements = $page->find($entry_selector);
if (empty($entryElements)) {
returnClientError('No entry elements for entry selector');
}
// Extract URIs with the associated entry element
$links_with_elements = [];
foreach ($entryElements as $entry) {
$url_element = $entry->find($url_selector, 0);
if (is_null($url_element)) {
// No `a` element found in this entry
if ($entry->tag == 'a') {
$url_element = $entry;
} else {
continue;
}
}
$links_with_elements[$url_element->href] = $entry;
}
if (empty($links_with_elements)) {
returnClientError('The provided URL selector matches some elements, but they do not
contain links.');
}
// Filter using the URL pattern
$filtered_urls = $this->filterUrlList(array_keys($links_with_elements), $url_pattern, $limit);
if (empty($filtered_urls)) {
returnClientError('No results for URL pattern');
}
$items = [];
foreach ($filtered_urls as $link) {
$items[$link] = $links_with_elements[$link];
}
return $items;
}
/**
* Retrieve article element from its URL using content selector and return the DOM element
* @param string $entry_url URL to retrieve article from
* @param string $content_selector HTML selector for extracting content, e.g. "article.content"
* @return article DOM element
*/
protected function fetchArticleElementFromPage($entry_url, $content_selector)
{
$entry_html = getSimpleHTMLDOMCached($entry_url, 86400, $this->getHeaders());
$article_content = $entry_html->find($content_selector, 0);
if (is_null($article_content)) {
returnClientError('Could not get article content at URL: ' . $entry_url);
}
$article_content = defaultLinkTo($article_content, $entry_url);
return $article_content;
}
protected function parseTimeStrAsTimestamp($timeStr, $format)
{
$date = date_parse_from_format($format, $timeStr);
if ($date['error_count'] != 0) {
returnClientError('Error while parsing time string');
}
$timestamp = mktime(
$date['hour'],
$date['minute'],
$date['second'],
$date['month'],
$date['day'],
$date['year']
);
if ($timestamp == false) {
returnClientError('Error while creating timestamp');
}
return $timestamp;
}
/**
* Retrieve article content from its URL using content selector and return a feed item
* @param object $entry_html A DOM element containing the article
* @param string $title_selector A selector to the article title from the article
* @param string $author_selector A selector to find the article author
* @param string $time_selector A selector to get the article publication time.
* @param string $time_format The format to parse the time_selector.
* @param string $content_cleanup Optional selector for removing elements, e.g. "div.ads,
* div.comments"
* @param string $title_default Optional title to use when could not extract title reliably
* @param bool $remove_styling Whether to remove class and style attributes from the HTML
* @return array Entry data: uri, title, content
*/
protected function parseEntryElement(
$entry_html,
$title_selector = null,
$author_selector = null,
$category_selector = null,
$time_selector = null,
$time_format = null,
$content_cleanup = null,
$title_default = null,
$remove_styling = false
) {
$article_content = convertLazyLoading($entry_html);
$article_title = '';
if (is_null($title_selector)) {
$article_title = $title_default;
} else {
$titleElement = $entry_html->find($title_selector, 0);
if ($titleElement) {
$article_title = trim($titleElement->innertext);
}
}
$author = null;
if (!is_null($author_selector) && $author_selector != '') {
$author = trim($entry_html->find($author_selector, 0)->innertext);
}
$categories = [];
if (!is_null($category_selector && $category_selector != '')) {
$category_elements = $entry_html->find($category_selector);
foreach ($category_elements as $category_element) {
$categories[] = trim($category_element->innertext);
}
}
$time = null;
if (!is_null($time_selector) && $time_selector != '') {
$time_element = $entry_html->find($time_selector, 0);
$time = $time_element->getAttribute('datetime');
if (empty($time)) {
$time = $time_element->innertext;
}
$this->parseTimeStrAsTimestamp($time, $time_format);
}
$article_content = $this->cleanArticleContent($article_content, $content_cleanup, $remove_styling);
$item = [];
$item['title'] = $article_title;
$item['content'] = $article_content;
$item['categories'] = $categories;
$item['timestamp'] = $time;
$item['author'] = $author;
return $item;
}
}

View File

@ -1,119 +0,0 @@
<?php
class CssSelectorFeedExpanderBridge extends CssSelectorBridge
{
const MAINTAINER = 'ORelio';
const NAME = 'CSS Selector Feed Expander';
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
const DESCRIPTION = 'Expand any site RSS feed using CSS selectors (Advanced Users)';
const PARAMETERS = [
[
'feed' => [
'name' => 'Feed: URL of truncated RSS feed',
'exampleValue' => 'https://example.com/feed.xml',
'required' => true
],
'content_selector' => [
'name' => 'Selector for each article content',
'title' => <<<EOT
This bridge works using CSS selectors, e.g. "div.article" will match <div class="article">.
Everything inside that element becomes feed item content.
EOT,
'exampleValue' => 'article.content',
'required' => true
],
'content_cleanup' => [
'name' => '[Optional] Content cleanup: List of items to remove',
'title' => 'Selector for unnecessary elements to remove inside article contents.',
'exampleValue' => 'div.ads, div.comments',
],
'dont_expand_metadata' => [
'name' => '[Optional] Don\'t expand metadata',
'title' => "This bridge will attempt to fill missing fields using metadata from the webpage.\nCheck to disable.",
'type' => 'checkbox',
],
'discard_thumbnail' => [
'name' => '[Optional] Discard thumbnail set by site author',
'title' => 'Some sites set their logo as thumbnail for every article. Use this option to discard it.',
'type' => 'checkbox',
],
'thumbnail_as_header' => [
'name' => '[Optional] Insert thumbnail as article header',
'title' => 'Insert article main image on top of article contents.',
'type' => 'checkbox',
],
'limit' => self::LIMIT
]
];
public function collectData()
{
$url = $this->getInput('feed');
$content_selector = $this->getInput('content_selector');
$content_cleanup = $this->getInput('content_cleanup');
$dont_expand_metadata = $this->getInput('dont_expand_metadata');
$discard_thumbnail = $this->getInput('discard_thumbnail');
$thumbnail_as_header = $this->getInput('thumbnail_as_header');
$limit = $this->getInput('limit');
$feedParser = new FeedParser();
$xml = getContents($url);
$source_feed = $feedParser->parseFeed($xml);
$items = $source_feed['items'];
// Map Homepage URL (Default: Root page)
if (isset($source_feed['uri'])) {
$this->homepageUrl = $source_feed['uri'];
} else {
$this->homepageUrl = urljoin($url, '/');
}
// Map Feed Name (Default: Domain name)
if (isset($source_feed['title'])) {
$this->feedName = $source_feed['title'];
} else {
$this->feedName = explode('/', urljoin($url, '/'))[2];
}
// Apply item limit (Default: Global limit)
if ($limit > 0) {
$items = array_slice($items, 0, $limit);
}
// Expand feed items (CssSelectorBridge)
foreach ($items as $item_from_feed) {
$item_expanded = $this->expandEntryWithSelector(
$item_from_feed['uri'],
$content_selector,
$content_cleanup
);
if ($dont_expand_metadata) {
// Take feed item, only replace content from expanded data
$content = $item_expanded['content'];
$item_expanded = $item_from_feed;
$item_expanded['content'] = $content;
} else {
// Take expanded item, but give priority to metadata already in source item
foreach ($item_from_feed as $field => $val) {
if ($field !== 'content' && !empty($val)) {
$item_expanded[$field] = $val;
}
}
}
if ($discard_thumbnail && isset($item_expanded['enclosures'])) {
unset($item_expanded['enclosures']);
}
if ($thumbnail_as_header && isset($item_expanded['enclosures'][0])) {
$item_expanded['content'] = '<p><img src="'
. $item_expanded['enclosures'][0]
. '" /></p>'
. $item_expanded['content'];
}
$this->items[] = $item_expanded;
}
}
}

View File

@ -47,10 +47,8 @@ class CubariBridge extends BridgeAbstract
*/ */
public function collectData() public function collectData()
{ {
// TODO: fix trivial SSRF $jsonSite = getContents($this->getInput('gist'));
$json = getContents($this->getInput('gist')); $jsonFile = json_decode($jsonSite, true);
$jsonFile = Json::decode($json);
$this->mangaTitle = $jsonFile['title']; $this->mangaTitle = $jsonFile['title'];
@ -68,14 +66,12 @@ class CubariBridge extends BridgeAbstract
{ {
$url = $this->getInput('gist'); $url = $this->getInput('gist');
if (preg_match('/\/([a-z]*)\.githubusercontent.com(.*)/', $url, $matches)) { preg_match('/\/([a-z]*)\.githubusercontent.com(.*)/', $url, $matches);
// raw or gist is first match. // raw or gist is first match.
$unencoded = $matches[1] . $matches[2]; $unencoded = $matches[1] . $matches[2];
return base64_encode($unencoded); return base64_encode($unencoded);
} else {
// todo: fix this
return '';
}
} }
private function getSanitizedHash($string) private function getSanitizedHash($string)

View File

@ -1,129 +0,0 @@
<?php
class CubariProxyBridge extends BridgeAbstract
{
const NAME = 'Cubari Proxy';
const MAINTAINER = 'phantop';
const URI = 'https://cubari.moe';
const DESCRIPTION = 'Returns chapters from Cubari.';
const PARAMETERS = [[
'service' => [
'name' => 'Content service',
'type' => 'list',
'defaultValue' => 'mangadex',
'values' => [
'MangAventure' => 'mangadventure',
'MangaDex' => 'mangadex',
'MangaKatana' => 'mangakatana',
'MangaSee' => 'mangasee',
]
],
'series' => [
'name' => 'Series ID/Name',
'exampleValue' => '8c1d7d0c-e0b7-4170-941d-29f652c3c19d', # KnH
'required' => true,
],
'fetch' => [
'name' => 'Fetch chapter page images',
'type' => 'list',
'title' => 'Places chapter images in feed contents. Entries will consume more bandwidth.',
'defaultValue' => 'c',
'values' => [
'None' => 'n',
'Content' => 'c',
'Enclosure' => 'e'
]
],
'limit' => self::LIMIT
]];
private $title;
public function collectData()
{
$limit = $this->getInput('limit') ?? 10;
$url = parent::getURI() . '/read/api/' . $this->getInput('service') . '/series/' . $this->getInput('series');
$json = Json::decode(getContents($url));
$this->title = $json['title'];
$chapters = $json['chapters'];
krsort($chapters);
$count = 0;
foreach ($chapters as $number => $element) {
$item = [];
$item['uri'] = $this->getURI() . '/' . $number;
if ($element['title']) {
$item['title'] = $number . ' - ' . $element['title'];
} else {
$item['title'] = 'Volume ' . $element['volume'] . ' Chapter ' . $number;
}
$group = '1';
if (isset($element['release_date'])) {
$dates = $element['release_date'];
$date = max($dates);
$item['timestamp'] = $date;
$group = array_keys($dates, $date)[0];
}
$page = $element['groups'][$group];
$item['author'] = $json['groups'][$group];
$api = parent::getURI() . $page;
$item['uid'] = $page;
$item['comments'] = $api;
if ($this->getInput('fetch') != 'n') {
$pages = [];
try {
$jsonp = getContents($api);
$pages = Json::decode($jsonp);
} catch (HttpException $e) {
// allow error 500, as it's effectively a 429
if ($e->getCode() != 500) {
throw $e;
}
}
if ($this->getInput('fetch') == 'e') {
$item['enclosures'] = $pages;
}
if ($this->getInput('fetch') == 'c') {
$item['content'] = '';
foreach ($pages as $img) {
$item['content'] .= '<img src="' . $img . '"/>';
}
}
}
if ($count++ == $limit) {
break;
}
$this->items[] = $item;
}
}
public function getName()
{
$name = parent::getName();
if (isset($this->title)) {
$name .= ' - ' . $this->title;
}
return $name;
}
public function getURI()
{
$uri = parent::getURI();
if ($this->getInput('service')) {
$uri .= '/read/' . $this->getInput('service') . '/' . $this->getInput('series');
}
return $uri;
}
public function getIcon()
{
return parent::getURI() . '/static/favicon.png';
}
}

View File

@ -1,107 +0,0 @@
<?php
class DRKBlutspendeBridge extends FeedExpander
{
const MAINTAINER = 'User123698745';
const NAME = 'DRK-Blutspende';
const BASE_URI = 'https://www.drk-blutspende.de';
const URI = self::BASE_URI;
const CACHE_TIMEOUT = 60 * 60 * 1; // 1 hour
const DESCRIPTION = 'German Red Cross (Deutsches Rotes Kreuz) blood donation service feed with more details';
const CONTEXT_APPOINTMENTS = 'Termine';
const PARAMETERS = [
self::CONTEXT_APPOINTMENTS => [
'term' => [
'name' => 'PLZ / Ort',
'required' => true,
'exampleValue' => '12555',
],
'radius' => [
'name' => 'Umkreis in km',
'type' => 'number',
'exampleValue' => 10,
],
'limit_days' => [
'name' => 'Limit von Tagen',
'title' => 'Nur Termine innerhalb der nächsten x Tagen',
'type' => 'number',
'exampleValue' => 28,
],
'limit_items' => [
'name' => 'Limit von Terminen',
'title' => 'Nicht mehr als x Termine',
'type' => 'number',
'required' => true,
'defaultValue' => 20,
]
]
];
public function collectData()
{
$limitItems = intval($this->getInput('limit_items'));
$this->collectExpandableDatas(self::buildAppointmentsURI(), $limitItems);
}
protected function parseItem(array $item)
{
$html = getSimpleHTMLDOM($item['uri']);
$detailsElement = $html->find('.details', 0);
$dateElement = $detailsElement->find('.datum', 0);
$dateLines = self::explodeLines($dateElement->plaintext);
$addressElement = $detailsElement->find('.adresse', 0);
$addressLines = self::explodeLines($addressElement->plaintext);
$infoElement = $detailsElement->find('.angebote > h4 + p', 0);
$info = $infoElement ? $infoElement->innertext : '';
$imageElements = $detailsElement->find('.fotos img');
$item['title'] = $dateLines[0] . ' ' . $dateLines[1] . ' ' . $addressLines[0] . ' - ' . $addressLines[1];
$item['content'] = <<<HTML
<p><b>{$dateLines[0]} {$dateLines[1]}</b></p>
<p>{$addressElement->innertext}</p>
<p>{$info}</p>
HTML;
foreach ($imageElements as $imageElement) {
$src = $imageElement->getAttribute('src');
$item['content'] .= <<<HTML
<p><img src="{$src}"></p>
HTML;
}
$item['description'] = null;
return $item;
}
public function getURI()
{
if ($this->queriedContext === self::CONTEXT_APPOINTMENTS) {
return str_replace('.rss?', '?', self::buildAppointmentsURI());
}
return parent::getURI();
}
private function buildAppointmentsURI()
{
$term = $this->getInput('term') ?? '';
$radius = $this->getInput('radius') ?? '';
$limitDays = intval($this->getInput('limit_days'));
$dateTo = $limitDays > 0 ? date('Y-m-d', time() + (60 * 60 * 24 * $limitDays)) : '';
return self::BASE_URI . '/blutspendetermine/termine.rss?date_to=' . $dateTo . '&radius=' . $radius . '&term=' . $term;
}
/**
* Returns an array of strings, each of which is a substring of string formed by splitting it on boundaries formed by line breaks.
*/
private function explodeLines(string $text): array
{
return array_map('trim', preg_split('/(\s*(\r\n|\n|\r)\s*)+/', $text));
}
}

View File

@ -1,102 +0,0 @@
<?PHP
class DacksnackBridge extends BridgeAbstract
{
const NAME = 'Däcksnack';
const URI = 'https://www.tidningendacksnack.se';
const DESCRIPTION = 'Latest news by the magazine Däcksnack';
const MAINTAINER = 'ajain-93';
public function getIcon()
{
return self::URI . '/upload/favicon/2591047722.png';
}
private function parseSwedishDates($dateString)
{
// Mapping of Swedish month names to English month names
$monthNames = [
'januari' => '01',
'februari' => '02',
'mars' => '03',
'april' => '04',
'maj' => '05',
'juni' => '06',
'juli' => '07',
'augusti' => '08',
'september' => '09',
'oktober' => '10',
'november' => '11',
'december' => '12'
];
// Split the date string into parts
list($day, $monthName, $year) = explode(' ', $dateString);
// Convert month name to month number
$month = $monthNames[$monthName];
// Format to a string recognizable by DateTime
$formattedDate = sprintf('%04d-%02d-%02d', $year, $month, $day);
// Create a DateTime object
$dateValue = new DateTime($formattedDate);
if ($dateValue) {
$dateValue->setTime(0, 0); // Set time to 00:00
return $dateValue->getTimestamp();
}
return $dateValue ? $dateValue->getTimestamp() : false;
}
public function collectData()
{
$NEWSURL = self::URI;
$html = getSimpleHTMLDOMCached($NEWSURL, 18000);
foreach ($html->find('a.main-news-item') as $element) {
// Debug::log($element);
$title = trim($element->find('h2', 0)->plaintext);
$category = trim($element->find('.category-tag', 0)->plaintext);
$url = self::URI . $element->getAttribute('href');
$published = $this->parseSwedishDates(trim($element->find('.published', 0)->plaintext));
$article_html = getSimpleHTMLDOMCached($url, 18000);
$article_content = $article_html->find('#ctl00_ContentPlaceHolder1_NewsArticleVeiw_pnlArticle', 0);
$figure = self::URI . $article_content->find('img.news-image', 0)->getAttribute('src');
$figure_caption = $article_content->find('.image-description', 0)->plaintext;
$author = $article_content->find('span.main-article-author', 0)->plaintext;
$preamble = $article_content->find('h4.main-article-ingress', 0)->plaintext;
$article_text = '';
foreach ($article_content->find('div') as $div) {
if (!$div->hasAttribute('class')) {
$article_text = $div;
}
}
// Use a regular expression to extract the name
if (preg_match('/Text:\s*(.*?)\s*Foto:/', $author, $matches)) {
$author = $matches[1]; // This will contain 'Jonna Jansson'
}
$content = '<b> [' . $category . '] <i>' . $preamble . '</i></b><br/><br/>';
$content .= '<figure>';
$content .= '<img src=' . $figure . '>';
$content .= '<figcaption>' . $figure_caption . '</figcaption>';
$content .= '</figure>';
$content .= $article_text;
$this->items[] = [
'uri' => $url,
'title' => $title,
'author' => $author,
'timestamp' => $published,
'content' => trim($content),
];
}
}
}

View File

@ -1,56 +0,0 @@
<?PHP
class DagensNyheterDirektBridge extends BridgeAbstract
{
const NAME = 'Dagens Nyheter Direkt';
const URI = 'https://www.dn.se/direkt/';
const BASEURL = 'https://www.dn.se';
const DESCRIPTION = 'Latest news summarised by Dagens Nyheter';
const MAINTAINER = 'ajain-93';
const LIMIT = 20;
public function getIcon()
{
return 'https://cdn.dn-static.se/images/favicon__c2dd3284b46ffdf4d520536e526065fa8.svg';
}
public function collectData()
{
$NEWSURL = self::BASEURL . '/ajax/direkt/';
$html = getSimpleHTMLDOM($NEWSURL);
foreach ($html->find('article') as $element) {
$link = $element->find('button', 0)->getAttribute('data-link');
$datetime = $element->getAttribute('data-publication-time');
$url = self::BASEURL . $link;
$title = $element->find('h2', 0)->plaintext;
$author = $element->find('div.ds-byline__titles', 0)->plaintext;
$article_content = $element->find('div.direkt-post__content', 0);
$article_html = '';
$figure = $element->find('figure', 0);
if ($figure) {
$article_html = $figure->find('img', 0) . '<p><i>' . $figure->find('figcaption', 0) . '</i></p>';
}
foreach ($article_content->find('p') as $p) {
$article_html = $article_html . $p;
}
$this->items[] = [
'uri' => $url,
'title' => $title,
'author' => trim($author),
'timestamp' => $datetime,
'content' => trim($article_html),
];
if (count($this->items) > self::LIMIT) {
break;
}
}
}
}

View File

@ -1,96 +0,0 @@
<?php
class DailythanthiBridge extends BridgeAbstract
{
const NAME = 'Dailythanthi';
const URI = 'https://www.dailythanthi.com';
const DESCRIPTION = 'Retrieve news from dailythanthi.com';
const MAINTAINER = 'tillcash';
const PARAMETERS = [
[
'topic' => [
'name' => 'topic',
'type' => 'list',
'values' => [
'news' => [
'tamilnadu' => 'news/state',
'india' => 'news/india',
'world' => 'news/world',
'sirappu-katturaigal' => 'news/sirappukatturaigal',
],
'cinema' => [
'news' => 'cinema/cinemanews',
],
'sports' => [
'sports' => 'sports',
'cricket' => 'sports/cricket',
'football' => 'sports/football',
'tennis' => 'sports/tennis',
'hockey' => 'sports/hockey',
'other-sports' => 'sports/othersports',
],
'devotional' => [
'devotional' => 'others/devotional',
'aalaya-varalaru' => 'aalaya-varalaru',
],
],
],
],
];
public function getName()
{
$topic = $this->getKey('topic');
return self::NAME . ($topic ? ' - ' . ucfirst($topic) : '');
}
public function collectData()
{
$dom = getSimpleHTMLDOM(self::URI . '/' . $this->getInput('topic'));
foreach ($dom->find('div.ListingNewsWithMEDImage') as $element) {
$slug = $element->find('a', 1);
$title = $element->find('h3', 0);
if (!$slug || !$title) {
continue;
}
$url = self::URI . $slug->href;
$date = $element->find('span', 1);
$date = $date ? $date->{'data-datestring'} : '';
$this->items[] = [
'content' => $this->constructContent($url),
'timestamp' => $date ? $date . 'UTC' : '',
'title' => $title->plaintext,
'uid' => $slug->href,
'uri' => $url,
];
}
}
private function constructContent($url)
{
$dom = getSimpleHTMLDOMCached($url);
$article = $dom->find('div.details-content-story', 0);
if (!$article) {
return 'Content Not Found';
}
// Remove ads
foreach ($article->find('div[id*="_ad"]') as $remove) {
$remove->outertext = '';
}
// Correct image tag in $article
foreach ($article->find('h-img') as $img) {
$img->parent->outertext = sprintf('<p><img src="%s"></p>', $img->src);
}
$image = $dom->find('div.main-image-caption-container img', 0);
$image = $image ? '<p>' . $image->outertext . '</p>' : '';
return $image . $article;
}
}

View File

@ -10,11 +10,9 @@ class DansTonChatBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
$url = self::URI . 'latest.html'; $html = getSimpleHTMLDOM(self::URI . 'latest.html');
$dom = getSimpleHTMLDOM($url);
$items = $dom->find('div.item'); foreach ($html->find('div.item') as $element) {
foreach ($items as $element) {
$item = []; $item = [];
$item['uri'] = $element->find('a', 0)->href; $item['uri'] = $element->find('a', 0)->href;
$titleContent = $element->find('h3 a', 0); $titleContent = $element->find('h3 a', 0);

View File

@ -9,7 +9,7 @@ class DarkReadingBridge extends FeedExpander
const PARAMETERS = [ [ const PARAMETERS = [ [
'feed' => [ 'feed' => [
'name' => 'Feed (NOT IN USE)', 'name' => 'Feed',
'type' => 'list', 'type' => 'list',
'values' => [ 'values' => [
'All Dark Reading Stories' => '000_AllArticles', 'All Dark Reading Stories' => '000_AllArticles',
@ -41,13 +41,24 @@ class DarkReadingBridge extends FeedExpander
public function collectData() public function collectData()
{ {
$feed_url = 'https://www.darkreading.com/rss.xml'; $feed = $this->getInput('feed');
$feed_splitted = explode('_', $feed);
$feed_id = $feed_splitted[0];
$feed_name = $feed_splitted[1];
if (empty($feed) || !ctype_digit($feed_id) || !preg_match('/[A-Za-z%20\/]/', $feed_name)) {
returnClientError('Invalid feed, please check the "feed" parameter.');
}
$feed_url = $this->getURI() . 'rss_simple.asp';
if ($feed_id != '000') {
$feed_url .= '?f_n=' . $feed_id . '&f_ln=' . $feed_name;
}
$limit = $this->getInput('limit') ?? 10; $limit = $this->getInput('limit') ?? 10;
$this->collectExpandableDatas($feed_url, $limit); $this->collectExpandableDatas($feed_url, $limit);
} }
protected function parseItem(array $item) protected function parseItem($newsItem)
{ {
$item = parent::parseItem($newsItem);
$article = getSimpleHTMLDOMCached($item['uri']); $article = getSimpleHTMLDOMCached($item['uri']);
$item['content'] = $this->extractArticleContent($article); $item['content'] = $this->extractArticleContent($article);
$item['enclosures'] = []; //remove author profile picture $item['enclosures'] = []; //remove author profile picture
@ -61,7 +72,7 @@ class DarkReadingBridge extends FeedExpander
private function extractArticleContent($article) private function extractArticleContent($article)
{ {
$content = $article->find('div.ContentModule-Wrapper', 0)->innertext; $content = $article->find('div.article-content', 0)->innertext;
foreach ( foreach (
[ [

View File

@ -43,8 +43,9 @@ class DauphineLibereBridge extends FeedExpander
$this->collectExpandableDatas($url, 10); $this->collectExpandableDatas($url, 10);
} }
protected function parseItem(array $item) protected function parseItem($newsItem)
{ {
$item = parent::parseItem($newsItem);
$item['content'] = $this->extractContent($item['uri']); $item['content'] = $this->extractContent($item['uri']);
return $item; return $item;
} }

View File

@ -0,0 +1,40 @@
<?php
class DavesTrailerPageBridge extends BridgeAbstract
{
const MAINTAINER = 'johnnygroovy';
const NAME = 'Daves Trailer Page Bridge';
const URI = 'https://www.davestrailerpage.co.uk/';
const DESCRIPTION = 'Last trailers in HD thanks to Dave.';
public function collectData()
{
$html = getSimpleHTMLDOM(static::URI)
or returnClientError('No results for this query.');
$curr_date = null;
foreach ($html->find('tr') as $tr) {
// If it's a date row, update the current date
if ($tr->align == 'center') {
$curr_date = $tr->plaintext;
continue;
}
$item = [];
// title
$item['title'] = $tr->find('td', 0)->find('b', 0)->plaintext;
// content
$item['content'] = $tr->find('ul', 1);
// uri
$item['uri'] = $tr->find('a', 3)->getAttribute('href');
// date: parsed by FeedItem using strtotime
$item['timestamp'] = $curr_date;
$this->items[] = $item;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -4,9 +4,8 @@ class DemoBridge extends BridgeAbstract
{ {
const MAINTAINER = 'teromene'; const MAINTAINER = 'teromene';
const NAME = 'DemoBridge'; const NAME = 'DemoBridge';
const URI = 'https://github.com/rss-bridge/rss-bridge'; const URI = 'http://github.com/rss-bridge/rss-bridge';
const DESCRIPTION = 'Bridge used for demos'; const DESCRIPTION = 'Bridge used for demos';
const CACHE_TIMEOUT = 15;
const PARAMETERS = [ const PARAMETERS = [
'testCheckbox' => [ 'testCheckbox' => [

View File

@ -1,63 +0,0 @@
<?php
class DemosBerlinBridge extends BridgeAbstract
{
const NAME = 'Demos Berlin';
const URI = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/';
const CACHE_TIMEOUT = 3 * 60 * 60;
const DESCRIPTION = 'Angezeigte Versammlungen und Aufzüge in Berlin';
const MAINTAINER = 'knrdl';
const PARAMETERS = [[
'days' => [
'name' => 'Tage',
'type' => 'number',
'title' => 'Einträge für die nächsten Tage zurückgeben',
'required' => true,
'defaultValue' => 7,
]
]];
public function getIcon()
{
return 'https://www.berlin.de/i9f/r1/images/favicon/favicon.ico';
}
public function collectData()
{
$url = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/index/all.json';
$json = getContents($url);
$jsonFile = json_decode($json, true);
$daysInterval = DateInterval::createFromDateString($this->getInput('days') . ' day');
$maxTargetDate = date_add(new DateTime('now'), $daysInterval);
foreach ($jsonFile['index'] as $entry) {
$entryDay = implode('-', array_reverse(explode('.', $entry['datum']))); // dd.mm.yyyy to yyyy-mm-dd
$ts = (new DateTime())->setTimestamp(strtotime($entryDay));
if ($ts <= $maxTargetDate) {
$item = [];
$item['uri'] = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/detail/' . $entry['id'];
$item['timestamp'] = $entryDay . ' ' . $entry['von'];
$item['title'] = $entry['thema'];
$location = $entry['strasse_nr'] . ' ' . $entry['plz'];
$locationQuery = http_build_query(['query' => $location]);
$item['content'] = <<<HTML
<h1>{$entry['thema']}</h1>
<p>📅 <time datetime="{$item['timestamp']}">{$entry['datum']} {$entry['von']} - {$entry['bis']}</time></p>
<a href="https://www.openstreetmap.org/search?$locationQuery">
📍 {$location}
</a>
<p>{$entry['aufzugsstrecke']}</p>
HTML;
$item['uid'] = $this->getSanitizedHash($entry['datum'] . '-' . $entry['von'] . '-' . $entry['bis'] . '-' . $entry['thema']);
$this->items[] = $item;
}
}
}
private function getSanitizedHash($string)
{
return hash('sha1', preg_replace('/[^a-zA-Z0-9]/', '', strtolower($string)));
}
}

View File

@ -78,9 +78,13 @@ class DerpibooruBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
$url = self::URI . 'api/v1/json/search/images?filter_id=' . urlencode($this->getInput('f')) . '&q=' . urlencode($this->getInput('q')); $queryJson = json_decode(getContents(
self::URI
$queryJson = json_decode(getContents($url)); . 'api/v1/json/search/images?filter_id='
. urlencode($this->getInput('f'))
. '&q='
. urlencode($this->getInput('q'))
));
foreach ($queryJson->images as $post) { foreach ($queryJson->images as $post) {
$item = []; $item = [];

View File

@ -1,148 +0,0 @@
<?php
class DeutscheWelleBridge extends FeedExpander
{
const MAINTAINER = 'No maintainer';
const NAME = 'Deutsche Welle Bridge';
const URI = 'https://www.dw.com';
const DESCRIPTION = 'Returns the full articles instead of only the intro';
const CACHE_TIMEOUT = 3600;
const PARAMETERS = [[
'feed' => [
'name' => 'feed',
'type' => 'list',
'values' => [
'All Top Stories and News Updates'
=> 'http://rss.dw.com/atom/rss-en-all',
'Top Stories'
=> 'http://rss.dw.com/atom/rss-en-top',
'Germany'
=> 'http://rss.dw.com/atom/rss-en-ger',
'World'
=> 'http://rss.dw.com/atom/rss-en-world',
'Europe'
=> 'http://rss.dw.com/atom/rss-en-eu',
'Business'
=> 'http://rss.dw.com/atom/rss-en-bus',
'Science'
=> 'http://rss.dw.com/atom/rss_en_science',
'Environment'
=> 'http://rss.dw.com/atom/rss_en_environment',
'Culture & Lifestyle'
=> 'http://rss.dw.com/atom/rss-en-cul',
'Sports'
=> 'http://rss.dw.de/atom/rss-en-sports',
'Visit Germany'
=> 'http://rss.dw.com/atom/rss-en-visitgermany',
'Asia'
=> 'http://rss.dw.com/atom/rss-en-asia',
'Deutsche Welle Gesamt'
=> 'http://rss.dw.com/atom/rss-de-all',
'Themen des Tages'
=> 'http://rss.dw.com/atom/rss-de-top',
'Nachrichten'
=> 'http://rss.dw.com/atom/rss-de-news',
'Wissenschaft'
=> 'http://rss.dw.com/atom/rss-de-wissenschaft',
'Sport'
=> 'http://rss.dw.com/atom/rss-de-sport',
'Deutschland entdecken'
=> 'http://rss.dw.com/atom/rss-de-deutschlandentdecken',
'Presse'
=> 'http://rss.dw.com/atom/presse',
'Politik'
=> 'http://rss.dw.com/atom/rss_de_politik',
'Wirtschaft'
=> 'http://rss.dw.com/atom/rss-de-eco',
'Kultur & Leben'
=> 'http://rss.dw.com/atom/rss-de-cul',
'Kultur & Leben: Buch'
=> 'http://rss.dw.com/atom/rss-de-cul-buch',
'Kultur & Leben: Film'
=> 'http://rss.dw.com/atom/rss-de-cul-film',
'Kultur & Leben: Musik'
=> 'http://rss.dw.com/atom/rss-de-cul-musik',
]
]
]];
public function collectData()
{
$this->collectExpandableDatas($this->getInput('feed'));
}
protected function parseItem(array $item)
{
$parsedUri = parse_url($item['uri']);
unset($parsedUri['query']);
$item['uri'] = $this->unparseUrl($parsedUri);
$page = getSimpleHTMLDOM($item['uri']);
$page = defaultLinkTo($page, $item['uri']);
$article = $page->find('article', 0);
// author
$author = $article->find('.author-link > span', 0);
if ($author) {
$item['author'] = $author->text();
}
$teaser = $article->find('.teaser-text', 0);
if (!is_null($teaser)) {
$item['content'] = $teaser->outertext();
} else {
$item['content'] = '';
}
// remove unneeded elements
foreach (
$article->find(
'header, .advertisement, [data-tracking-name="sharing-icons-inline"], a.external-link > svg, picture > source, .vjs-wrapper, .dw-widget, footer'
) as $bad
) {
$bad->remove();
}
// reload html as remove() is buggy
$article = str_get_html($article->outertext());
// remove width and height values from img tags
foreach ($article->find('img') as $img) {
$img->width = null;
$img->height = null;
}
// remove bad img src's added by defaultLinkTo() above
// these images should have src="" and will then use
// the srcset attribute to load the best image for the displayed size
foreach ($article->find('figure > picture > img') as $img) {
$img->src = '';
}
// replace lazy-loaded images
foreach ($article->find('figure.placeholder-image') as $figure) {
$img = $figure->find('img', 0);
$img->src = str_replace('${formatId}', '906', $img->getAttribute('data-url'));
$img->style = null;
}
$item['content'] .= $article->save();
return $item;
}
// https://www.php.net/manual/en/function.parse-url.php#106731
private function unparseUrl($parsed_url)
{
$scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
$host = isset($parsed_url['host']) ? $parsed_url['host'] : '';
$port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
$user = isset($parsed_url['user']) ? $parsed_url['user'] : '';
$pass = isset($parsed_url['pass']) ? $parsed_url['pass'] : '';
$pass = ($user || $pass) ? "$pass@" : '';
$path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
$query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
$fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
return "$scheme$user$pass$host$port$path$query$fragment";
}
}

View File

@ -1,28 +0,0 @@
<?php
class DeutscherAeroClubBridge extends XPathAbstract
{
const NAME = 'Deutscher Aero Club';
const URI = 'https://www.daec.de/news/';
const DESCRIPTION = 'News aus Luftsport und Dachverband';
const MAINTAINER = 'hleskien';
const FEED_SOURCE_URL = 'https://www.daec.de/news/';
const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"][1]/@href';
const XPATH_EXPRESSION_ITEM = '//div[contains(@class, "news-list-view")]/div[contains(@class, "article")]';
const XPATH_EXPRESSION_ITEM_TITLE = './/span[@itemprop="headline"]';
const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@itemprop="description"]/p';
const XPATH_EXPRESSION_ITEM_URI = './/div[@class="news-header"]//a/@href';
//const XPATH_EXPRESSION_ITEM_AUTHOR = './/';
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time/@datetime';
const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/img/@src';
//const XPATH_EXPRESSION_ITEM_CATEGORIES = './/';
protected function formatItemTimestamp($value)
{
$dti = DateTimeImmutable::createFromFormat('Y-m-d', $value);
$dti = $dti->setTime(0, 0, 0);
return $dti->getTimestamp();
}
}

View File

@ -163,6 +163,19 @@ class DeveloppezDotComBridge extends FeedExpander
] ]
]; ];
/**
* Return the RSS url for selected domain
*/
private function getRssUrl()
{
$domain = $this->getInput('domain');
if (!empty($domain)) {
return 'https://' . $domain . self::DOMAIN . self::RSS_URL;
}
return self::URI . self::RSS_URL;
}
/** /**
* Grabs the RSS item from Developpez.com * Grabs the RSS item from Developpez.com
*/ */
@ -176,12 +189,15 @@ class DeveloppezDotComBridge extends FeedExpander
* Parse the content of every RSS item. And will try to get the full article * Parse the content of every RSS item. And will try to get the full article
* pointed by the item URL intead of the default abstract. * pointed by the item URL intead of the default abstract.
*/ */
protected function parseItem(array $item) protected function parseItem($newsItem)
{ {
if (count($this->items) >= $this->getInput('limit')) { if (count($this->items) >= $this->getInput('limit')) {
return null; return null;
} }
// This function parse each entry in the RSS with the default parse
$item = parent::parseItem($newsItem);
// There is a bug in Developpez RSS, coma are writtent as '~?' in the // There is a bug in Developpez RSS, coma are writtent as '~?' in the
// title, so I have to fix it manually // title, so I have to fix it manually
$item['title'] = $this->fixComaInTitle($item['title']); $item['title'] = $this->fixComaInTitle($item['title']);
@ -213,19 +229,6 @@ class DeveloppezDotComBridge extends FeedExpander
return $item; return $item;
} }
/**
* Return the RSS url for selected domain
*/
private function getRssUrl()
{
$domain = $this->getInput('domain');
if (!empty($domain)) {
return 'https://' . $domain . self::DOMAIN . self::RSS_URL;
}
return self::URI . self::RSS_URL;
}
/** /**
* Replace '~?' by a proper coma ',' * Replace '~?' by a proper coma ','
*/ */
@ -331,9 +334,6 @@ class DeveloppezDotComBridge extends FeedExpander
*/ */
private function isHtmlTagNotTxt($txt) private function isHtmlTagNotTxt($txt)
{ {
if ($txt === '') {
return false;
}
$html = str_get_html($txt); $html = str_get_html($txt);
return $html && $html->root && count($html->root->children) > 0; return $html && $html->root && count($html->root->children) > 0;
} }

View File

@ -47,7 +47,7 @@ class DiarioDoAlentejoBridge extends BridgeAbstract
}, self::PT_MONTH_NAMES), }, self::PT_MONTH_NAMES),
array_map(function ($num) { array_map(function ($num) {
return sprintf('-%02d-', $num); return sprintf('-%02d-', $num);
}, range(1, count(self::PT_MONTH_NAMES))), }, range(1, sizeof(self::PT_MONTH_NAMES))),
$element->find('span.date', 0)->innertext $element->find('span.date', 0)->innertext
); );

Some files were not shown because too many files have changed in this diff Show More