mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-04 16:49:35 +00:00
Compare commits
631 Commits
2023-07-11
...
master
Author | SHA1 | Date | |
---|---|---|---|
|
d6a9da1cc8 | ||
|
85962e18d3 | ||
|
a19b63e840 | ||
|
5365b57638 | ||
|
462c005f2c | ||
|
db42f2786c | ||
|
26a4c255d3 | ||
|
3055e69c23 | ||
|
7c1e01b45a | ||
|
4d8a46d46e | ||
|
9d6aa5ee38 | ||
|
1c45eff505 | ||
|
68ff39e164 | ||
|
abb1602524 | ||
|
87112497de | ||
|
38bb5115c9 | ||
|
23cb9349fc | ||
|
05a9ac0f06 | ||
|
91fe6c1fae | ||
|
7260f28e10 | ||
|
87ab1e4513 | ||
|
dee734d360 | ||
|
744f996224 | ||
|
f270cd35e7 | ||
|
83c36a87e2 | ||
|
810e17b556 | ||
|
97f07cf216 | ||
|
62fafdc24b | ||
|
cd4cdcfd65 | ||
|
00a24e2f69 | ||
|
92b5e7093f | ||
|
b52f01505d | ||
|
e4c32bb046 | ||
|
dd4dcfa59c | ||
|
4e678c955f | ||
|
549bed64d2 | ||
|
94924d8e16 | ||
|
920b21b1fd | ||
|
935075072b | ||
|
3ae7a10223 | ||
|
bf431a6eae | ||
|
824ac5e373 | ||
|
ae8394d976 | ||
|
4da61b7922 | ||
|
8b1ba003a8 | ||
|
230edf602e | ||
|
bd7d1734c3 | ||
|
dd8bc077ed | ||
|
952a2d99a3 | ||
|
58b3cfb158 | ||
|
028acd0af1 | ||
|
2a58f82bd8 | ||
|
5214581386 | ||
|
eadea242a7 | ||
|
1a2c1f5bba | ||
|
776a1f47f3 | ||
|
39ecd63f72 | ||
|
0e2655fc8a | ||
|
e355276378 | ||
|
cb65125dbd | ||
|
1d02214e12 | ||
|
48cb7d71ed | ||
|
f9e9c8101e | ||
|
97f7df0d06 | ||
|
db3899f2e6 | ||
|
d36cd0a332 | ||
|
662e0bfa95 | ||
|
3fc38c15a3 | ||
|
be51ba17df | ||
|
c44a76ff17 | ||
|
7c6d4a932c | ||
|
45ee018a6e | ||
|
e825272987 | ||
|
97eebfb562 | ||
|
2a44a006b2 | ||
|
974f00cd6a | ||
|
4b4d622333 | ||
|
b4a63e7040 | ||
|
7d544f1fab | ||
|
152e96d3d0 | ||
|
f0db6a22d1 | ||
|
8234906127 | ||
|
d2370320e9 | ||
|
9126b0f982 | ||
|
4685bbdffd | ||
|
bf4a918e60 | ||
|
17d142c038 | ||
|
59d77d4576 | ||
|
d956471d42 | ||
|
6a81fc0f51 | ||
|
88ccc6067c | ||
|
c7f9870ba7 | ||
|
c651e11b0f | ||
|
b42a993176 | ||
|
ec6f98e3c2 | ||
|
74496e23aa | ||
|
83bc3fd762 | ||
|
628b30208a | ||
|
e3260ff529 | ||
|
086ef7f8a7 | ||
|
2ee615e588 | ||
|
a6e8760726 | ||
|
9457e075f6 | ||
|
2294dac3f1 | ||
|
6c86e2c1f7 | ||
|
dd165ea9d1 | ||
|
1cd5b072f3 | ||
|
8d6d0fa10c | ||
|
bd0fb1da99 | ||
|
29d984cbe7 | ||
|
082542dabc | ||
|
bc536f3928 | ||
|
c3dc46a307 | ||
|
6c88f2c21e | ||
|
668f3a9d7e | ||
|
b9eb3c887a | ||
|
f9a51b6768 | ||
|
51cdb66f9c | ||
|
bd88bc27d3 | ||
|
56994b3b5c | ||
|
664436c5f4 | ||
|
70cf917f09 | ||
|
776e27218a | ||
|
e5e2059ed7 | ||
|
e7d6f89887 | ||
|
0c96a47e8c | ||
|
5d83050673 | ||
|
bd823100cd | ||
|
f89c75b4b8 | ||
|
cdf21d48e5 | ||
|
3a5de759fa | ||
|
eb21e97d01 | ||
|
6aba9fdf54 | ||
|
63c16e470d | ||
|
af26d845d9 | ||
|
80c43f10d8 | ||
|
d9316cdc60 | ||
|
40041dd65f | ||
|
358bebbb89 | ||
|
293d04f296 | ||
|
3dc8b65a0b | ||
|
486191b419 | ||
|
a6bdc322b0 | ||
|
36fd72c87e | ||
|
9cabf60144 | ||
|
6a24e53d6c | ||
|
bb2f471a03 | ||
|
3e1a8b29d9 | ||
|
9f48370eb0 | ||
|
39952c2d95 | ||
|
e7ae06dcf0 | ||
|
58544cd61a | ||
|
e010fd4d52 | ||
|
d51cc8f1a7 | ||
|
6516e31c1b | ||
|
c849576c93 | ||
|
b0674d7b19 | ||
|
05e2c350b7 | ||
|
4a3919c1a3 | ||
|
06a8896000 | ||
|
d379f3e575 | ||
|
3a327503ee | ||
|
2d5d2f5017 | ||
|
320afc3f32 | ||
|
c0e37bcf35 | ||
|
e9d3a657ba | ||
|
307c22204d | ||
|
4424ea54e9 | ||
|
133dbf87c5 | ||
|
2e6e246759 | ||
|
129b8a3a5a | ||
|
4ef5ca50c6 | ||
|
adcc8e371d | ||
|
f358f1abec | ||
|
2acd415475 | ||
|
6afd13eb06 | ||
|
2a96bf19b5 | ||
|
9973f731df | ||
|
7073bb2f46 | ||
|
db85015daa | ||
|
8c4385e61d | ||
|
829d570f8e | ||
|
b25a779d98 | ||
|
ee54cf4576 | ||
|
9215b95779 | ||
|
c11bc184ca | ||
|
313be4c512 | ||
|
4faaa79101 | ||
|
6ec9193546 | ||
|
401cc187b7 | ||
|
0051e0fcdd | ||
|
d050fe9a9b | ||
|
8ae716e75c | ||
|
b505667168 | ||
|
615c533587 | ||
|
8a1f2604aa | ||
|
b8a9f34527 | ||
|
e55e9b8fac | ||
|
9982bfce1f | ||
|
1a8d0fb8ab | ||
|
891c8979a3 | ||
|
aa3989873c | ||
|
cb91afbd71 | ||
|
22b39e3fcd | ||
|
6d81d6d306 | ||
|
955fb6f315 | ||
|
8dd56bca05 | ||
|
f773878459 | ||
|
d28a0fd94b | ||
|
bba225dfe8 | ||
|
a1b3e596fc | ||
|
2fcba49433 | ||
|
049af3cef7 | ||
|
376e711f03 | ||
|
00d5242871 | ||
|
f7ddbcd733 | ||
|
da8cfdf179 | ||
|
4539eb69aa | ||
|
8bf1537054 | ||
|
d0c35146dd | ||
|
adad9d6405 | ||
|
2a84350cb2 | ||
|
d60f0b0e74 | ||
|
00074b9bfc | ||
|
206bebc7bd | ||
|
0eac7a0784 | ||
|
649dfa7292 | ||
|
bb1e308057 | ||
|
e1b74aeb1b | ||
|
d3d33c72bd | ||
|
87fa6ea71e | ||
|
36706a3dec | ||
|
cfd406861e | ||
|
bd90109c70 | ||
|
5a68ee0c87 | ||
|
dc199ebf5c | ||
|
75f35391fa | ||
|
7bde7a56f9 | ||
|
4d12aa2a9e | ||
|
a7ed3d56f9 | ||
|
b785a4b64e | ||
|
6e2aeda61d | ||
|
4949900863 | ||
|
776ee233bd | ||
|
1c3024fca7 | ||
|
d11b7f7754 | ||
|
f480209825 | ||
|
d15960f955 | ||
|
f3ca567159 | ||
|
d31f20758c | ||
|
154b8b9cdb | ||
|
1f71d76ac1 | ||
|
8c3e973b9f | ||
|
97f5dafbc5 | ||
|
957a820931 | ||
|
b4d397ff70 | ||
|
89013faf7d | ||
|
428c6c3c66 | ||
|
58c254ad3b | ||
|
a73b66f4d6 | ||
|
815dc180cc | ||
|
7d6881732d | ||
|
4602f4f475 | ||
|
b3ac1d176c | ||
|
d5aa3aef69 | ||
|
3ff2ef94e0 | ||
|
001dd47439 | ||
|
3cba984d22 | ||
|
82606a479a | ||
|
94292af51b | ||
|
f736da6fae | ||
|
fb66775ece | ||
|
8f962383c2 | ||
|
bb979e9e08 | ||
|
a12bab9eed | ||
|
b4659786cb | ||
|
7001fbaf49 | ||
|
d5d470cbc2 | ||
|
182567e434 | ||
|
9682f74fc5 | ||
|
17a3b4c9d8 | ||
|
73289324bd | ||
|
8ca1b90840 | ||
|
1c3c85d8ff | ||
|
d23fd2522c | ||
|
b58d8b099b | ||
|
545dc969d3 | ||
|
24e429969f | ||
|
e0be366258 | ||
|
be445759b6 | ||
|
db984d8a8b | ||
|
e251e358ff | ||
|
0c2099a852 | ||
|
fee5e269d0 | ||
|
2aace6c898 | ||
|
3ed193eee2 | ||
|
58e2b56d40 | ||
|
a61524bf77 | ||
|
36147a082d | ||
|
e6cb5fdc89 | ||
|
4bad1c140a | ||
|
5b80af978f | ||
|
ecf61f6fa7 | ||
|
254efc2812 | ||
|
84b93e0f8f | ||
|
79699131e8 | ||
|
f7c1b71939 | ||
|
7a7f8d5050 | ||
|
683c968d64 | ||
|
4c355ba308 | ||
|
35f6e62e45 | ||
|
932f20d434 | ||
|
e65155f440 | ||
|
7813f4564e | ||
|
4d15ffd2cf | ||
|
598ee5b51e | ||
|
257799be8e | ||
|
8e8028b786 | ||
|
ff7840d60f | ||
|
df7b91a2a3 | ||
|
7b2ac36264 | ||
|
46ac77590e | ||
|
6f731b20a9 | ||
|
8a6798a227 | ||
|
ae2eb2f1d1 | ||
|
cfef482366 | ||
|
75a0a779c0 | ||
|
6bb04d48ed | ||
|
6878eb26aa | ||
|
64f95b4990 | ||
|
66a6847fd0 | ||
|
7931f37a83 | ||
|
d175bab58e | ||
|
7c89712837 | ||
|
a14508d79b | ||
|
ca87562cab | ||
|
b964dcd936 | ||
|
81be72ea04 | ||
|
8423c52606 | ||
|
d01c462ad5 | ||
|
b2c8475b2c | ||
|
c4fceab7b3 | ||
|
cfe3dcfe6d | ||
|
a15e578158 | ||
|
c3a968193e | ||
|
6938f06125 | ||
|
0e3a79fd78 | ||
|
e58c867a82 | ||
|
d08d13f2c8 | ||
|
9574c17ddc | ||
|
06b299e627 | ||
|
1262cc982c | ||
|
487c692e68 | ||
|
4986119f1f | ||
|
bd58266b80 | ||
|
4973aaadf6 | ||
|
feb2a686d7 | ||
|
b6909942c8 | ||
|
6ac976b92e | ||
|
72eea1bd3d | ||
|
fffe4663cb | ||
|
a865b1073a | ||
|
d960e0049d | ||
|
bb36eb9eb8 | ||
|
12a90e2074 | ||
|
6408123330 | ||
|
6eaf0eaa56 | ||
|
191e5b0493 | ||
|
d5175aebcc | ||
|
d9ac019550 | ||
|
080e29365a | ||
|
c7e8ddf486 | ||
|
0eb4f6b267 | ||
|
491cb50219 | ||
|
2e5d2a88f3 | ||
|
1fecc4cfc1 | ||
|
0c08f791ef | ||
|
0bf5dbbc0b | ||
|
3ce94409ab | ||
|
ea58c8d2bc | ||
|
55ffac5bae | ||
|
12395fcf2d | ||
|
0f6fa8034b | ||
|
e904de2dc9 | ||
|
ef378663aa | ||
|
fac1f5cd88 | ||
|
7dbe106582 | ||
|
2032ed18c4 | ||
|
f67d2eb88a | ||
|
5ab1924c4f | ||
|
c8178e1fc4 | ||
|
ad2d4c7b1b | ||
|
1938446385 | ||
|
c9074facfe | ||
|
9f163ab7c6 | ||
|
98dafb61ae | ||
|
ea2b4d7506 | ||
|
f40f997405 | ||
|
4c5cf89725 | ||
|
a81acbe464 | ||
|
4e40e032b0 | ||
|
98a94855dc | ||
|
0c6ffbf5a4 | ||
|
3944ae68cb | ||
|
b34fa2d278 | ||
|
c5f586497f | ||
|
c9c2944e7c | ||
|
0116dde275 | ||
|
d4ae55733b | ||
|
4e1fa946b4 | ||
|
d127bf6e00 | ||
|
38e9c396cf | ||
|
0c4b498d4f | ||
|
d157816e07 | ||
|
f01729c86f | ||
|
0b67544f86 | ||
|
a3b064f4ee | ||
|
4a398a5b14 | ||
|
3ef0226a08 | ||
|
c3d9383523 | ||
|
deb9a7269e | ||
|
f3df283c4d | ||
|
206edaedf5 | ||
|
44ff2f2cf8 | ||
|
ccc20849ff | ||
|
609eed1791 | ||
|
b037d1b4d1 | ||
|
2b741b1c1b | ||
|
ef711cb30b | ||
|
4919c53c10 | ||
|
b347a9268a | ||
|
e76b0601b3 | ||
|
57b61c8787 | ||
|
7a7fa876d2 | ||
|
a6310cff1a | ||
|
84b5ffcc7c | ||
|
8d0ddb579f | ||
|
1dabd10e25 | ||
|
cee25d862d | ||
|
d4e4c3e89a | ||
|
f134808a26 | ||
|
a6a4502209 | ||
|
4722201281 | ||
|
4f7451895b | ||
|
8ff39f64f7 | ||
|
658391263e | ||
|
9056106c2d | ||
|
7533ef12e3 | ||
|
a41bb088f8 | ||
|
8203196145 | ||
|
563c2a345b | ||
|
ef5bd83bd0 | ||
|
408c2e5e91 | ||
|
f7f3ca0126 | ||
|
611fabe46c | ||
|
2aa52aa99a | ||
|
cf9558648e | ||
|
daef240cd2 | ||
|
5f37c72be0 | ||
|
fd52b9b9a4 | ||
|
920d00480d | ||
|
49d9dafaec | ||
|
2880524dfc | ||
|
e379019db2 | ||
|
44fb2c98bc | ||
|
382648fc22 | ||
|
9bda9e246a | ||
|
6634291c67 | ||
|
e55a88fb8e | ||
|
6a72c56cdd | ||
|
d21f8cebf6 | ||
|
7e183915a9 | ||
|
145bd10f4c | ||
|
b6a9baff94 | ||
|
143f90da60 | ||
|
47f52b5912 | ||
|
f97a3fa4d9 | ||
|
5f777d4126 | ||
|
e376805249 | ||
|
1cbe1a6f98 | ||
|
59dd49671d | ||
|
64582a64f1 | ||
|
547af0d0d2 | ||
|
69da0dd583 | ||
|
41df17bc46 | ||
|
0c92cf32d4 | ||
|
7273a05f02 | ||
|
d822d666c7 | ||
|
6cf9dfb7c9 | ||
|
3557e5ffd4 | ||
|
2172df9fa2 | ||
|
b9ec6a0eb4 | ||
|
0de5180ded | ||
|
f9ec88fb45 | ||
|
c04c0a5614 | ||
|
ae53adefad | ||
|
f421c45b21 | ||
|
cd30c25b08 | ||
|
e1b911fc1f | ||
|
09f3c1532a | ||
|
857e908929 | ||
|
f321f000c1 | ||
|
437afd67e0 | ||
|
ce353c1e4f | ||
|
0dc6c66840 | ||
|
d33808ea9e | ||
|
0c69148cff | ||
|
bab02bf190 | ||
|
f943f8d002 | ||
|
b3b0736761 | ||
|
cb6c931b1f | ||
|
07f49225d9 | ||
|
a6a1d553d9 | ||
|
39d6710798 | ||
|
a3c29f3a52 | ||
|
7a9bfa1087 | ||
|
7329b83cc0 | ||
|
360f953be8 | ||
|
0bf38e5c56 | ||
|
e6aef73a02 | ||
|
cf7e3eea56 | ||
|
3b91b1d260 | ||
|
409236e48e | ||
|
bb7f329e81 | ||
|
0175e13712 | ||
|
4323a11667 | ||
|
4f5a492dde | ||
|
3e1e96e477 | ||
|
a9cf1512e7 | ||
|
3178deb5a8 | ||
|
4b9f6f7e53 | ||
|
a786bbd4e0 | ||
|
078091752a | ||
|
586d707ae4 | ||
|
b3a7842448 | ||
|
dbe37cc302 | ||
|
52b90e0873 | ||
|
38b957398a | ||
|
752098e0fa | ||
|
99b86c0e1c | ||
|
b9fdd20f8f | ||
|
92b2bc5e11 | ||
|
64000a2526 | ||
|
4d05d0beff | ||
|
9707586ee8 | ||
|
52c59caf2f | ||
|
f0ec797f4b | ||
|
9e33a15b93 | ||
|
00a18a1cd1 | ||
|
14607c07f6 | ||
|
999d5dce40 | ||
|
c3b5b382ba | ||
|
18a8a51271 | ||
|
0325c2414a | ||
|
eb4ff7099f | ||
|
7591b10219 | ||
|
54045be951 | ||
|
3ac861a866 | ||
|
c5cbab1231 | ||
|
959dd937b4 | ||
|
79e3f7f204 | ||
|
a1237d90f1 | ||
|
28077155ca | ||
|
7a1180c80f | ||
|
ce72503df6 | ||
|
d55994643d | ||
|
11ea6aedfd | ||
|
52d3cce59d | ||
|
6cc4cf24dc | ||
|
1fcf67f14a | ||
|
f3896ed543 | ||
|
b86ee5778b | ||
|
43ec82179b | ||
|
cf6d94dc2a | ||
|
3e3481bd7a | ||
|
4976cd227e | ||
|
d32419ffcf | ||
|
7661a78a43 | ||
|
ed97ce8646 | ||
|
10f7b6f4f6 | ||
|
8e2353ad3e | ||
|
7e4807530e | ||
|
8b6eecea25 | ||
|
f8fd05f08f | ||
|
f957eea300 | ||
|
93eecdf79f | ||
|
3a57fc800b | ||
|
701fe3cfed | ||
|
11ce8b5dcd | ||
|
f5f76f111b | ||
|
bf4ea12719 | ||
|
235c084820 | ||
|
977c0db382 | ||
|
556bca58cf | ||
|
2cc89b767c | ||
|
1f6c2cd32c | ||
|
b6fab20601 | ||
|
74635fd752 | ||
|
38ca124de0 | ||
|
39a8346c53 | ||
|
d08b2616ef | ||
|
0a118310cb | ||
|
663729cf19 | ||
|
2ffb54c7c2 | ||
|
517c7f5c9b | ||
|
93620aa105 | ||
|
a4a328583a | ||
|
f91723d9e5 | ||
|
6254b8593e | ||
|
087e790ec1 | ||
|
4ce63c88aa | ||
|
a1bae7a9a8 | ||
|
08d16322e1 | ||
|
440adf2f3b | ||
|
a59793e8d6 | ||
|
7b46b97abd | ||
|
310160fd92 | ||
|
773eea196f | ||
|
e8420b9f39 | ||
|
ef8181478d | ||
|
eaea8e6640 | ||
|
e5729fdaac | ||
|
c8039d483b | ||
|
73d88dda46 | ||
|
ea0456ea08 | ||
|
9efdf24a6e | ||
|
a234392f80 | ||
|
b9102d7e87 | ||
|
0f2b55fbef | ||
|
69aa751f40 | ||
|
b3bf95bfdd |
@ -12,6 +12,6 @@ server {
|
||||
|
||||
location ~ \.php$ {
|
||||
include snippets/fastcgi-php.conf;
|
||||
fastcgi_pass 127.0.0.1:9000;
|
||||
fastcgi_pass unix:/var/run/php/php8.2-fpm.sock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
2
.gitattributes
vendored
2
.gitattributes
vendored
@ -47,8 +47,6 @@ phpcs.xml export-ignore
|
||||
phpcompatibility.xml export-ignore
|
||||
tests/ export-ignore
|
||||
cache/.gitkeep export-ignore
|
||||
bridges/DemoBridge.php export-ignore
|
||||
bridges/FeedExpanderExampleBridge.php export-ignore
|
||||
|
||||
## Composer
|
||||
#
|
||||
|
7
.github/.gitignore
vendored
Normal file
7
.github/.gitignore
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
# Visual Studio Code
|
||||
.vscode/*
|
||||
|
||||
# Generated files
|
||||
comment*.md
|
||||
comment*.txt
|
||||
*.html
|
4
.github/ISSUE_TEMPLATE/bridge-request.md
vendored
4
.github/ISSUE_TEMPLATE/bridge-request.md
vendored
@ -49,9 +49,9 @@ Please describe what you expect from the bridge. Whenever possible provide sampl
|
||||
- _Default limit_: 5
|
||||
- [ ] Load full articles
|
||||
- _Cache articles_ (articles are stored in a local cache on first request): yes
|
||||
- _Cache timeout_ (max = 24 hours): 24 hours
|
||||
- _Cache timeout_ : 24 hours
|
||||
- [X] Balance requests (RSS-Bridge uses cached versions to reduce bandwith usage)
|
||||
- _Timeout_ (default = 5 minutes, max = 24 hours): 5 minutes
|
||||
- _Timeout_ (default = 5 minutes): 5 minutes
|
||||
|
||||
<!--Be aware that some options might not be available for your specific request due to technical limitations!-->
|
||||
|
||||
|
289
.github/prtester.py
vendored
289
.github/prtester.py
vendored
@ -1,113 +1,208 @@
|
||||
import argparse
|
||||
import requests
|
||||
import itertools
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
import os.path
|
||||
from typing import Iterable
|
||||
import os
|
||||
import glob
|
||||
import urllib
|
||||
|
||||
# This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge
|
||||
#
|
||||
# This will scrape the whitelisted bridges in the current state (port 3000) and the PR state (port 3001) of
|
||||
# RSS-Bridge, generate a feed for each of the bridges and save the output as html files.
|
||||
# It also replaces the default static CSS link with a hardcoded link to @em92's public instance, so viewing
|
||||
# It also add a <base> tag with the url of em's public instance, so viewing
|
||||
# the HTML file locally will actually work as designed.
|
||||
|
||||
def testBridges(bridges,status):
|
||||
for bridge in bridges:
|
||||
if bridge.get('data-ref'): # Some div entries are empty, this ignores those
|
||||
bridgeid = bridge.get('id')
|
||||
bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata
|
||||
print(bridgeid + "\n")
|
||||
bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html'
|
||||
forms = bridge.find_all("form")
|
||||
formid = 1
|
||||
for form in forms:
|
||||
# a bridge can have multiple contexts, named 'forms' in html
|
||||
# this code will produce a fully working formstring that should create a working feed when called
|
||||
# this will create an example feed for every single context, to test them all
|
||||
formstring = ''
|
||||
errormessages = []
|
||||
parameters = form.find_all("input")
|
||||
lists = form.find_all("select")
|
||||
# this for/if mess cycles through all available input parameters, checks if it required, then pulls
|
||||
# the default or examplevalue and then combines it all together into the formstring
|
||||
# if an example or default value is missing for a required attribute, it will throw an error
|
||||
# any non-required fields are not tested!!!
|
||||
for parameter in parameters:
|
||||
if parameter.get('type') == 'hidden' and parameter.get('name') == 'context':
|
||||
cleanvalue = parameter.get('value').replace(" ","+")
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue
|
||||
if parameter.get('type') == 'number' or parameter.get('type') == 'text':
|
||||
if parameter.has_attr('required'):
|
||||
if parameter.get('placeholder') == '':
|
||||
if parameter.get('value') == '':
|
||||
errormessages.append(parameter.get('name'))
|
||||
else:
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value')
|
||||
ARTIFACT_FILE_EXTENSION = '.html'
|
||||
|
||||
class Instance:
|
||||
name = ''
|
||||
url = ''
|
||||
|
||||
def main(instances: Iterable[Instance], with_upload: bool, with_reduced_upload: bool, title: str, output_file: str):
|
||||
start_date = datetime.now()
|
||||
|
||||
prid = os.getenv('PR')
|
||||
artifact_base_url = f'https://rss-bridge.github.io/rss-bridge-tests/prs/{prid}'
|
||||
artifact_directory = os.getcwd()
|
||||
for file in glob.glob(f'*{ARTIFACT_FILE_EXTENSION}', root_dir=artifact_directory):
|
||||
os.remove(file)
|
||||
|
||||
table_rows = []
|
||||
for instance in instances:
|
||||
page = requests.get(instance.url) # Use python requests to grab the rss-bridge main page
|
||||
soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup
|
||||
bridge_cards = soup.select('.bridge-card') # get a soup-formatted list of all bridges on the rss-bridge page
|
||||
table_rows += testBridges(
|
||||
instance=instance,
|
||||
bridge_cards=bridge_cards,
|
||||
with_upload=with_upload,
|
||||
with_reduced_upload=with_reduced_upload,
|
||||
artifact_directory=artifact_directory,
|
||||
artifact_base_url=artifact_base_url) # run the main scraping code with the list of bridges
|
||||
with open(file=output_file, mode='w+', encoding='utf-8') as file:
|
||||
table_rows_value = '\n'.join(sorted(table_rows))
|
||||
file.write(f'''
|
||||
## {title}
|
||||
| Bridge | Context | Status |
|
||||
| - | - | - |
|
||||
{table_rows_value}
|
||||
|
||||
*last change: {start_date.strftime("%A %Y-%m-%d %H:%M:%S")}*
|
||||
'''.strip())
|
||||
|
||||
def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, with_reduced_upload: bool, artifact_directory: str, artifact_base_url: str) -> Iterable:
|
||||
instance_suffix = ''
|
||||
if instance.name:
|
||||
instance_suffix = f' ({instance.name})'
|
||||
table_rows = []
|
||||
for bridge_card in bridge_cards:
|
||||
bridgeid = bridge_card.get('id')
|
||||
bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata
|
||||
print(f'{bridgeid}{instance_suffix}')
|
||||
bridge_name = bridgeid.replace('Bridge', '')
|
||||
context_forms = bridge_card.find_all("form")
|
||||
form_number = 1
|
||||
for context_form in context_forms:
|
||||
# a bridge can have multiple contexts, named 'forms' in html
|
||||
# this code will produce a fully working url that should create a working feed when called
|
||||
# this will create an example feed for every single context, to test them all
|
||||
context_parameters = {}
|
||||
error_messages = []
|
||||
context_name = '*untitled*'
|
||||
context_name_element = context_form.find_previous_sibling('h5')
|
||||
if context_name_element and context_name_element.text.strip() != '':
|
||||
context_name = context_name_element.text
|
||||
parameters = context_form.find_all("input")
|
||||
lists = context_form.find_all("select")
|
||||
# this for/if mess cycles through all available input parameters, checks if it required, then pulls
|
||||
# the default or examplevalue and then combines it all together into the url parameters
|
||||
# if an example or default value is missing for a required attribute, it will throw an error
|
||||
# any non-required fields are not tested!!!
|
||||
for parameter in parameters:
|
||||
parameter_type = parameter.get('type')
|
||||
parameter_name = parameter.get('name')
|
||||
if parameter_type == 'hidden':
|
||||
context_parameters[parameter_name] = parameter.get('value')
|
||||
if parameter_type == 'number' or parameter_type == 'text':
|
||||
if parameter.has_attr('required'):
|
||||
if parameter.get('placeholder') == '':
|
||||
if parameter.get('value') == '':
|
||||
error_messages.append(f'Missing example or default value for parameter "{parameter_name}"')
|
||||
else:
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder')
|
||||
# same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring
|
||||
if parameter.get('type') == 'checkbox':
|
||||
if parameter.has_attr('checked'):
|
||||
formstring = formstring + '&' + parameter.get('name') + '=on'
|
||||
for listing in lists:
|
||||
selectionvalue = ''
|
||||
listname = listing.get('name')
|
||||
cleanlist = []
|
||||
for option in listing.contents:
|
||||
if 'optgroup' in option.name:
|
||||
cleanlist.extend(option)
|
||||
context_parameters[parameter_name] = parameter.get('value')
|
||||
else:
|
||||
cleanlist.append(option)
|
||||
firstselectionentry = 1
|
||||
for selectionentry in cleanlist:
|
||||
if firstselectionentry:
|
||||
context_parameters[parameter_name] = parameter.get('placeholder')
|
||||
# same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the url parameters
|
||||
if parameter_type == 'checkbox':
|
||||
if parameter.has_attr('checked'):
|
||||
context_parameters[parameter_name] = 'on'
|
||||
for listing in lists:
|
||||
selectionvalue = ''
|
||||
listname = listing.get('name')
|
||||
cleanlist = []
|
||||
options = listing.find_all('option')
|
||||
for option in options:
|
||||
if 'optgroup' in option.name:
|
||||
cleanlist.extend(option)
|
||||
else:
|
||||
cleanlist.append(option)
|
||||
firstselectionentry = 1
|
||||
for selectionentry in cleanlist:
|
||||
if firstselectionentry:
|
||||
selectionvalue = selectionentry.get('value')
|
||||
firstselectionentry = 0
|
||||
else:
|
||||
if 'selected' in selectionentry.attrs:
|
||||
selectionvalue = selectionentry.get('value')
|
||||
firstselectionentry = 0
|
||||
else:
|
||||
if 'selected' in selectionentry.attrs:
|
||||
selectionvalue = selectionentry.get('value')
|
||||
break
|
||||
formstring = formstring + '&' + listname + '=' + selectionvalue
|
||||
if not errormessages:
|
||||
# if all example/default values are present, form the full request string, run the request, replace the static css
|
||||
# file with the url of em's public instance and then upload it to termpad.com, a pastebin-like-site.
|
||||
r = requests.get(URL + bridgestring + formstring)
|
||||
pagetext = r.text.replace('static/style.css','https://rss-bridge.org/bridge01/static/style.css')
|
||||
pagetext = pagetext.encode("utf_8")
|
||||
termpad = requests.post(url="https://termpad.com/", data=pagetext)
|
||||
termpadurl = termpad.text
|
||||
termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
|
||||
termpadurl = termpadurl.replace('\n','')
|
||||
with open(os.getcwd() + '/comment.txt', 'a+') as file:
|
||||
file.write("\n")
|
||||
file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
|
||||
break
|
||||
context_parameters[listname] = selectionvalue
|
||||
artifact_url = 'about:blank'
|
||||
if error_messages:
|
||||
status = '<br>'.join(map(lambda m: f'❌ `{m}`', error_messages))
|
||||
else:
|
||||
# if all example/default values are present, form the full request url, run the request, add a <base> tag with
|
||||
# the url of em's public instance to the response text (so that relative paths work, e.g. to the static css file) and
|
||||
# then save it to a html file.
|
||||
context_parameters.update({
|
||||
'action': 'display',
|
||||
'bridge': bridgeid,
|
||||
'format': 'Html',
|
||||
})
|
||||
request_url = f'{instance.url}/?{urllib.parse.urlencode(context_parameters)}'
|
||||
response = requests.get(request_url)
|
||||
page_text = response.text.replace('<head>','<head><base href="https://rss-bridge.org/bridge01/" target="_blank">')
|
||||
page_text = page_text.encode("utf_8")
|
||||
soup = BeautifulSoup(page_text, "html.parser")
|
||||
status_messages = []
|
||||
if response.status_code != 200:
|
||||
status_messages += [f'❌ `HTTP status {response.status_code} {response.reason}`']
|
||||
else:
|
||||
# if there are errors (which means that a required value has no example or default value), log out which error appeared
|
||||
termpad = requests.post(url="https://termpad.com/", data=str(errormessages))
|
||||
termpadurl = termpad.text
|
||||
termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
|
||||
termpadurl = termpadurl.replace('\n','')
|
||||
with open(os.getcwd() + '/comment.txt', 'a+') as file:
|
||||
file.write("\n")
|
||||
file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
|
||||
formid += 1
|
||||
feed_items = soup.select('.feeditem')
|
||||
feed_items_length = len(feed_items)
|
||||
if feed_items_length <= 0:
|
||||
status_messages += [f'⚠️ `The feed has no items`']
|
||||
elif feed_items_length == 1 and len(soup.select('.error')) > 0:
|
||||
status_messages += [f'❌ `{getFirstLine(feed_items[0].text)}`']
|
||||
status_messages += map(lambda e: f'❌ `{getFirstLine(e.text)}`', soup.select('.error .error-type') + soup.select('.error .error-message'))
|
||||
for item_element in soup.select('.feeditem'): # remove all feed items to not accidentally selected <pre> tags from item content
|
||||
item_element.decompose()
|
||||
status_messages += map(lambda e: f'⚠️ `{getFirstLine(e.text)}`', soup.find_all('pre'))
|
||||
status_messages = list(dict.fromkeys(status_messages)) # remove duplicates
|
||||
status = '<br>'.join(status_messages)
|
||||
status_is_ok = status == '';
|
||||
if status_is_ok:
|
||||
status = '✔️'
|
||||
if with_upload and (not with_reduced_upload or not status_is_ok):
|
||||
filename = f'{bridge_name} {form_number}{instance_suffix}{ARTIFACT_FILE_EXTENSION}'
|
||||
filename = re.sub(r'[^a-z0-9 \_\-\.]', '', filename, flags=re.I).replace(' ', '_')
|
||||
with open(file=f'{artifact_directory}/{filename}', mode='wb') as file:
|
||||
file.write(page_text)
|
||||
artifact_url = f'{artifact_base_url}/{filename}'
|
||||
table_rows.append(f'| {bridge_name} | [{form_number} {context_name}{instance_suffix}]({artifact_url}) | {status} |')
|
||||
form_number += 1
|
||||
return table_rows
|
||||
|
||||
gitstatus = ["current", "pr"]
|
||||
now = datetime.now()
|
||||
date_time = now.strftime("%Y-%m-%d, %H:%M:%S")
|
||||
def getFirstLine(value: str) -> str:
|
||||
# trim whitespace and remove text that can break the table or is simply unnecessary
|
||||
clean_value = re.sub(r'^\[[^\]]+\]\s*rssbridge\.|[\|`]', '', value.strip())
|
||||
first_line = next(iter(clean_value.splitlines()), '')
|
||||
max_length = 250
|
||||
if (len(first_line) > max_length):
|
||||
first_line = first_line[:max_length] + '...'
|
||||
return first_line
|
||||
|
||||
with open(os.getcwd() + '/comment.txt', 'w+') as file:
|
||||
file.write(''' ## Pull request artifacts
|
||||
| file | last change |
|
||||
| ---- | ------ |''')
|
||||
|
||||
for status in gitstatus: # run this twice, once for the current version, once for the PR version
|
||||
if status == "current":
|
||||
port = "3000" # both ports are defined in the corresponding workflow .yml file
|
||||
elif status == "pr":
|
||||
port = "3001"
|
||||
URL = "http://localhost:" + port
|
||||
page = requests.get(URL) # Use python requests to grab the rss-bridge main page
|
||||
soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup
|
||||
bridges = soup.find_all("section") # get a soup-formatted list of all bridges on the rss-bridge page
|
||||
testBridges(bridges,status) # run the main scraping code with the list of bridges and the info if this is for the current version or the pr version
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--instances', nargs='+')
|
||||
parser.add_argument('--no-upload', action='store_true')
|
||||
parser.add_argument('--reduced-upload', action='store_true')
|
||||
parser.add_argument('--title', default='Pull request artifacts')
|
||||
parser.add_argument('--output-file', default=os.getcwd() + '/comment.txt')
|
||||
args = parser.parse_args()
|
||||
instances = []
|
||||
if args.instances:
|
||||
for instance_arg in args.instances:
|
||||
instance_arg_parts = instance_arg.split('::')
|
||||
instance = Instance()
|
||||
instance.name = instance_arg_parts[1].strip() if len(instance_arg_parts) >= 2 else ''
|
||||
instance.url = instance_arg_parts[0].strip().rstrip("/")
|
||||
instances.append(instance)
|
||||
else:
|
||||
instance = Instance()
|
||||
instance.name = 'current'
|
||||
instance.url = 'http://localhost:3000'
|
||||
instances.append(instance)
|
||||
instance = Instance()
|
||||
instance.name = 'pr'
|
||||
instance.url = 'http://localhost:3001'
|
||||
instances.append(instance)
|
||||
main(
|
||||
instances=instances,
|
||||
with_upload=not args.no_upload,
|
||||
with_reduced_upload=args.reduced_upload and not args.no_upload,
|
||||
title=args.title,
|
||||
output_file=args.output_file
|
||||
);
|
||||
|
12
.github/workflows/dockerbuild.yml
vendored
12
.github/workflows/dockerbuild.yml
vendored
@ -21,7 +21,7 @@ jobs:
|
||||
-
|
||||
name: Docker meta
|
||||
id: docker_meta
|
||||
uses: docker/metadata-action@v4
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
${{ env.DOCKERHUB_SLUG }}
|
||||
@ -33,26 +33,26 @@ jobs:
|
||||
type=raw,value=stable,enable=${{ startsWith(github.ref, 'refs/tags/20') }}
|
||||
-
|
||||
name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
uses: docker/setup-qemu-action@v3
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
-
|
||||
name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/bake-action@v2
|
||||
uses: docker/bake-action@v5
|
||||
with:
|
||||
files: |
|
||||
./docker-bake.hcl
|
||||
|
2
.github/workflows/documentation.yml
vendored
2
.github/workflows/documentation.yml
vendored
@ -9,7 +9,7 @@ jobs:
|
||||
documentation:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Setup PHP
|
||||
|
6
.github/workflows/lint.yml
vendored
6
.github/workflows/lint.yml
vendored
@ -13,7 +13,7 @@ jobs:
|
||||
matrix:
|
||||
php-versions: ['7.4']
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: ${{ matrix.php-versions }}
|
||||
@ -26,7 +26,7 @@ jobs:
|
||||
matrix:
|
||||
php-versions: ['7.4']
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: ${{ matrix.php-versions }}
|
||||
@ -38,7 +38,7 @@ jobs:
|
||||
executable_php_files_check:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- run: |
|
||||
if find -name "*.php" -executable -type f -print -exec false {} +
|
||||
then
|
||||
|
75
.github/workflows/prhtmlgenerator.yml
vendored
75
.github/workflows/prhtmlgenerator.yml
vendored
@ -5,24 +5,41 @@ on:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
check-bridges:
|
||||
name: Check if bridges were changed
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
BRIDGES: ${{ steps.check1.outputs.BRIDGES }}
|
||||
steps:
|
||||
- name: Check number of bridges
|
||||
id: check1
|
||||
run: |
|
||||
PR=${{github.event.number}};
|
||||
wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch;
|
||||
bridgeamount=$(cat $PR.patch | grep "\bbridges/[A-Za-z0-9]*Bridge\.php\b" | sed "s=.*\bbridges/\([A-Za-z0-9]*\)Bridge\.php\b.*=\1=g" | sort | uniq | wc -l);
|
||||
echo "BRIDGES=$bridgeamount" >> "$GITHUB_OUTPUT"
|
||||
test-pr:
|
||||
name: Generate HTML
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-bridges
|
||||
if: needs.check-bridges.outputs.BRIDGES > 0
|
||||
env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
# Needs additional permissions https://github.com/actions/first-interaction/issues/10#issuecomment-1041402989
|
||||
steps:
|
||||
- name: Check out self
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{github.event.pull_request.head.ref}}
|
||||
repository: ${{github.event.pull_request.head.repo.full_name}}
|
||||
- name: Check out rss-bridge
|
||||
run: |
|
||||
PR=${{github.event.number}};
|
||||
wget -O requirements.txt https://raw.githubusercontent.com/RSS-Bridge/rss-bridge/master/.github/prtester-requirements.txt;
|
||||
wget https://raw.githubusercontent.com/RSS-Bridge/rss-bridge/master/.github/prtester.py;
|
||||
wget -O requirements.txt https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester-requirements.txt;
|
||||
wget https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester.py;
|
||||
wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch;
|
||||
touch DEBUG;
|
||||
cat $PR.patch | grep " bridges/.*\.php" | sed "s= bridges/\(.*\)Bridge.php.*=\1=g" | sort | uniq > whitelist.txt
|
||||
cat $PR.patch | grep "\bbridges/[A-Za-z0-9]*Bridge\.php\b" | sed "s=.*\bbridges/\([A-Za-z0-9]*\)Bridge\.php\b.*=\1=g" | sort | uniq > whitelist.txt
|
||||
- name: Start Docker - Current
|
||||
run: |
|
||||
docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3000:80 ghcr.io/rss-bridge/rss-bridge:latest
|
||||
@ -31,9 +48,9 @@ jobs:
|
||||
docker build -t prbuild .;
|
||||
docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3001:80 prbuild
|
||||
- name: Setup python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.7'
|
||||
python-version: '3.13'
|
||||
cache: 'pip'
|
||||
- name: Install requirements
|
||||
run: |
|
||||
@ -49,9 +66,17 @@ jobs:
|
||||
body="${body//$'\n'/'%0A'}";
|
||||
body="${body//$'\r'/'%0D'}";
|
||||
echo "bodylength=${#body}" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
PR: ${{ github.event.number }}
|
||||
- name: Upload generated tests
|
||||
uses: actions/upload-artifact@v4
|
||||
id: upload-generated-tests
|
||||
with:
|
||||
name: tests
|
||||
path: '*.html'
|
||||
- name: Find Comment
|
||||
if: ${{ steps.testrun.outputs.bodylength > 130 }}
|
||||
uses: peter-evans/find-comment@v2
|
||||
uses: peter-evans/find-comment@v3
|
||||
id: fc
|
||||
with:
|
||||
issue-number: ${{ github.event.pull_request.number }}
|
||||
@ -59,9 +84,43 @@ jobs:
|
||||
body-includes: Pull request artifacts
|
||||
- name: Create or update comment
|
||||
if: ${{ steps.testrun.outputs.bodylength > 130 }}
|
||||
uses: peter-evans/create-or-update-comment@v2
|
||||
uses: peter-evans/create-or-update-comment@v4
|
||||
with:
|
||||
comment-id: ${{ steps.fc.outputs.comment-id }}
|
||||
issue-number: ${{ github.event.pull_request.number }}
|
||||
body-file: comment.txt
|
||||
edit-mode: replace
|
||||
upload_tests:
|
||||
name: Upload tests
|
||||
runs-on: ubuntu-latest
|
||||
needs: test-pr
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: 'RSS-Bridge/rss-bridge-tests'
|
||||
ref: 'main'
|
||||
token: ${{ secrets.RSSTESTER_ACTION }}
|
||||
|
||||
- name: Setup git config
|
||||
run: |
|
||||
git config --global user.name "GitHub Actions"
|
||||
git config --global user.email "<>"
|
||||
|
||||
- name: Download tests
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: tests
|
||||
|
||||
- name: Move tests
|
||||
run: |
|
||||
cd prs
|
||||
mkdir -p ${{github.event.number}}
|
||||
cd ${{github.event.number}}
|
||||
mv -f $GITHUB_WORKSPACE/*.html .
|
||||
|
||||
- name: Commit and push generated tests
|
||||
run: |
|
||||
export COMMIT_MESSAGE="Added tests for PR ${{github.event.number}}"
|
||||
git add .
|
||||
git commit -m "$COMMIT_MESSAGE"
|
||||
git push
|
||||
|
4
.github/workflows/tests.yml
vendored
4
.github/workflows/tests.yml
vendored
@ -13,9 +13,11 @@ jobs:
|
||||
matrix:
|
||||
php-versions: ['7.4', '8.0', '8.1']
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: ${{ matrix.php-versions }}
|
||||
env:
|
||||
update: true
|
||||
- run: composer install
|
||||
- run: composer test
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -6,7 +6,6 @@ data/
|
||||
*.pydevproject
|
||||
.project
|
||||
.metadata
|
||||
bin/
|
||||
tmp/
|
||||
*.tmp
|
||||
*.bak
|
||||
@ -230,6 +229,9 @@ pip-log.txt
|
||||
DEBUG
|
||||
config.ini.php
|
||||
config/*
|
||||
!config/nginx.conf
|
||||
!config/php-fpm.conf
|
||||
!config/php.ini
|
||||
|
||||
######################
|
||||
## VisualStudioCode ##
|
||||
|
@ -15,7 +15,7 @@
|
||||
* [Astalaseven](https://github.com/Astalaseven)
|
||||
* [Astyan-42](https://github.com/Astyan-42)
|
||||
* [austinhuang0131](https://github.com/austinhuang0131)
|
||||
* [AxorPL](https://github.com/AxorPL)
|
||||
* [axor-mst](https://github.com/axor-mst)
|
||||
* [ayacoo](https://github.com/ayacoo)
|
||||
* [az5he6ch](https://github.com/az5he6ch)
|
||||
* [b1nj](https://github.com/b1nj)
|
||||
@ -23,6 +23,7 @@
|
||||
* [Binnette](https://github.com/Binnette)
|
||||
* [BoboTiG](https://github.com/BoboTiG)
|
||||
* [Bockiii](https://github.com/Bockiii)
|
||||
* [brtsos](https://github.com/brtsos)
|
||||
* [captn3m0](https://github.com/captn3m0)
|
||||
* [chemel](https://github.com/chemel)
|
||||
* [Chouchen](https://github.com/Chouchen)
|
||||
@ -144,6 +145,7 @@
|
||||
* [Niehztog](https://github.com/Niehztog)
|
||||
* [NikNikYkt](https://github.com/NikNikYkt)
|
||||
* [Nono-m0le](https://github.com/Nono-m0le)
|
||||
* [NotsoanoNimus](https://github.com/NotsoanoNimus)
|
||||
* [obsiwitch](https://github.com/obsiwitch)
|
||||
* [Ololbu](https://github.com/Ololbu)
|
||||
* [ORelio](https://github.com/ORelio)
|
||||
|
78
Dockerfile
78
Dockerfile
@ -1,36 +1,72 @@
|
||||
FROM lwthiker/curl-impersonate:0.5-ff-slim-buster AS curlimpersonate
|
||||
|
||||
FROM php:8.0.27-fpm-buster AS rssbridge
|
||||
FROM debian:12-slim AS rssbridge
|
||||
|
||||
LABEL description="RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one."
|
||||
LABEL repository="https://github.com/RSS-Bridge/rss-bridge"
|
||||
LABEL website="https://github.com/RSS-Bridge/rss-bridge"
|
||||
|
||||
RUN apt-get update && \
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
RUN set -xe && \
|
||||
apt-get update && \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
ca-certificates \
|
||||
nginx \
|
||||
zlib1g-dev \
|
||||
libzip-dev \
|
||||
libmemcached-dev \
|
||||
nss-plugin-pem \
|
||||
libicu-dev && \
|
||||
docker-php-ext-install zip && \
|
||||
docker-php-ext-install intl && \
|
||||
pecl install memcached && \
|
||||
docker-php-ext-enable memcached && \
|
||||
docker-php-ext-enable opcache && \
|
||||
mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini"
|
||||
|
||||
COPY ./config/nginx.conf /etc/nginx/sites-enabled/default
|
||||
|
||||
COPY --chown=www-data:www-data ./ /app/
|
||||
|
||||
COPY --from=curlimpersonate /usr/local/lib/libcurl-impersonate-ff.so /usr/local/lib/curl-impersonate/
|
||||
php-curl \
|
||||
php-fpm \
|
||||
php-intl \
|
||||
# php-json is enabled by default with PHP 8.2 in Debian 12
|
||||
php-mbstring \
|
||||
php-memcached \
|
||||
# php-opcache is enabled by default with PHP 8.2 in Debian 12
|
||||
# php-openssl is enabled by default with PHP 8.2 in Debian 12
|
||||
php-sqlite3 \
|
||||
php-xml \
|
||||
php-zip \
|
||||
# php-zlib is enabled by default with PHP 8.2 in Debian 12
|
||||
# for downloading libcurl-impersonate
|
||||
curl \
|
||||
&& \
|
||||
# install curl-impersonate library
|
||||
curlimpersonate_version=0.6.0 && \
|
||||
{ \
|
||||
{ \
|
||||
[ $(arch) = 'aarch64' ] && \
|
||||
archive="libcurl-impersonate-v${curlimpersonate_version}.aarch64-linux-gnu.tar.gz" && \
|
||||
sha512sum="d04b1eabe71f3af06aa1ce99b39a49c5e1d33b636acedcd9fad163bc58156af5c3eb3f75aa706f335515791f7b9c7a6c40ffdfa47430796483ecef929abd905d" \
|
||||
; } \
|
||||
|| { \
|
||||
[ $(arch) = 'armv7l' ] && \
|
||||
archive="libcurl-impersonate-v${curlimpersonate_version}.arm-linux-gnueabihf.tar.gz" && \
|
||||
sha512sum="05906b4efa1a6ed8f3b716fd83d476b6eea6bfc68e3dbc5212d65a2962dcaa7bd1f938c9096a7535252b11d1d08fb93adccc633585ff8cb8cec5e58bfe969bc9" \
|
||||
; } \
|
||||
|| { \
|
||||
[ $(arch) = 'x86_64' ] && \
|
||||
archive="libcurl-impersonate-v${curlimpersonate_version}.x86_64-linux-gnu.tar.gz" && \
|
||||
sha512sum="480bbe9452cd9aff2c0daaaf91f1057b3a96385f79011628a9237223757a9b0d090c59cb5982dc54ea0d07191657299ea91ca170a25ced3d7d410fcdff130ace" \
|
||||
; } \
|
||||
} && \
|
||||
curl -LO "https://github.com/lwthiker/curl-impersonate/releases/download/v${curlimpersonate_version}/${archive}" && \
|
||||
echo "$sha512sum $archive" | sha512sum -c - && \
|
||||
mkdir -p /usr/local/lib/curl-impersonate && \
|
||||
tar xaf "$archive" -C /usr/local/lib/curl-impersonate --wildcards 'libcurl-impersonate-ff.so*' && \
|
||||
rm "$archive" && \
|
||||
apt-get purge --assume-yes curl && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so
|
||||
|
||||
ENV CURL_IMPERSONATE ff91esr
|
||||
|
||||
# logs should go to stdout / stderr
|
||||
RUN ln -sfT /dev/stderr /var/log/nginx/error.log; \
|
||||
ln -sfT /dev/stdout /var/log/nginx/access.log; \
|
||||
chown -R --no-dereference www-data:adm /var/log/nginx/
|
||||
|
||||
COPY ./config/nginx.conf /etc/nginx/sites-available/default
|
||||
COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf
|
||||
COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini
|
||||
|
||||
COPY --chown=www-data:www-data ./ /app/
|
||||
|
||||
EXPOSE 80
|
||||
|
||||
ENTRYPOINT ["/app/docker-entrypoint.sh"]
|
||||
|
422
README.md
422
README.md
@ -2,12 +2,25 @@
|
||||
|
||||

|
||||
|
||||
RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one.
|
||||
RSS-Bridge is a PHP web application.
|
||||
|
||||
It generates web feeds for websites that don't have one.
|
||||
|
||||
Officially hosted instance: https://rss-bridge.org/bridge01/
|
||||
|
||||
IRC channel #rssbridge at https://libera.chat/
|
||||
|
||||
[Full documentation](https://rss-bridge.github.io/rss-bridge/index.html)
|
||||
|
||||
Alternatively find another
|
||||
[public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html).
|
||||
|
||||
Requires minimum PHP 7.4.
|
||||
|
||||
|
||||
[](UNLICENSE)
|
||||
[](https://github.com/rss-bridge/rss-bridge/releases/latest)
|
||||
[](https://web.libera.chat/#rssbridge)
|
||||
[](https://matrix.to/#/#rssbridge:libera.chat)
|
||||
[](https://github.com/RSS-Bridge/rss-bridge/actions)
|
||||
|
||||
|||
|
||||
@ -15,116 +28,219 @@ RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for website
|
||||
|||
|
||||
|||
|
||||
|||
|
||||
|||
|
||||
|
||||
## A subset of bridges
|
||||
## A subset of bridges (15/447)
|
||||
|
||||
* `YouTube` : YouTube user channel, playlist or search
|
||||
* `Twitter` : Return keyword/hashtag search or user timeline
|
||||
* `Telegram` : Return the latest posts from a public group
|
||||
* `Reddit` : Return the latest posts from a subreddit or user
|
||||
* `Filter` : Filter an existing feed url
|
||||
* `Vk` : Latest posts from a user or group
|
||||
* `FeedMerge` : Merge two or more existing feeds into one
|
||||
* `Twitch` : Fetch the latest videos from a channel
|
||||
* `ThePirateBay` : Returns the newest indexed torrents from [The Pirate Bay](https://thepiratebay.se/) with keywords
|
||||
|
||||
And [many more](bridges/), thanks to the community!
|
||||
|
||||
[Full documentation](https://rss-bridge.github.io/rss-bridge/index.html)
|
||||
|
||||
Check out RSS-Bridge right now on https://rss-bridge.org/bridge01 or find another
|
||||
[public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html).
|
||||
* `CssSelectorBridge`: [Scrape out a feed using CSS selectors](https://rss-bridge.org/bridge01/#bridge-CssSelectorBridge)
|
||||
* `FeedMergeBridge`: [Combine multiple feeds into one](https://rss-bridge.org/bridge01/#bridge-FeedMergeBridge)
|
||||
* `FeedReducerBridge`: [Reduce a noisy feed by some percentage](https://rss-bridge.org/bridge01/#bridge-FeedReducerBridge)
|
||||
* `FilterBridge`: [Filter a feed by excluding/including items by keyword](https://rss-bridge.org/bridge01/#bridge-FilterBridge)
|
||||
* `GettrBridge`: [Fetches the latest posts from a GETTR user](https://rss-bridge.org/bridge01/#bridge-GettrBridge)
|
||||
* `MastodonBridge`: [Fetches statuses from a Mastodon (ActivityPub) instance](https://rss-bridge.org/bridge01/#bridge-MastodonBridge)
|
||||
* `RedditBridge`: [Fetches posts from a user/subredit (with filtering options)](https://rss-bridge.org/bridge01/#bridge-RedditBridge)
|
||||
* `RumbleBridge`: [Fetches channel/user videos](https://rss-bridge.org/bridge01/#bridge-RumbleBridge)
|
||||
* `SoundcloudBridge`: [Fetches music by username](https://rss-bridge.org/bridge01/#bridge-SoundcloudBridge)
|
||||
* `TelegramBridge`: [Fetches posts from a public channel](https://rss-bridge.org/bridge01/#bridge-TelegramBridge)
|
||||
* `ThePirateBayBridge:` [Fetches torrents by search/user/category](https://rss-bridge.org/bridge01/#bridge-ThePirateBayBridge)
|
||||
* `TikTokBridge`: [Fetches posts by username](https://rss-bridge.org/bridge01/#bridge-TikTokBridge)
|
||||
* `TwitchBridge`: [Fetches videos from channel](https://rss-bridge.org/bridge01/#bridge-TwitchBridge)
|
||||
* `XPathBridge`: [Scrape out a feed using XPath expressions](https://rss-bridge.org/bridge01/#bridge-XPathBridge)
|
||||
* `YoutubeBridge`: [Fetches videos by username/channel/playlist/search](https://rss-bridge.org/bridge01/#bridge-YoutubeBridge)
|
||||
* `YouTubeCommunityTabBridge`: [Fetches posts from a channel's community tab](https://rss-bridge.org/bridge01/#bridge-YouTubeCommunityTabBridge)
|
||||
|
||||
## Tutorial
|
||||
|
||||
RSS-Bridge requires php 7.4 (or higher).
|
||||
### How to install on traditional shared web hosting
|
||||
|
||||
### Install with composer or git
|
||||
RSS-Bridge can basically be unzipped into a web folder. Should be working instantly.
|
||||
|
||||
Latest zip:
|
||||
https://github.com/RSS-Bridge/rss-bridge/archive/refs/heads/master.zip (2MB)
|
||||
|
||||
### How to install on Debian 12 (nginx + php-fpm)
|
||||
|
||||
These instructions have been tested on a fresh Debian 12 VM from Digital Ocean (1vcpu-512mb-10gb, 5 USD/month).
|
||||
|
||||
```shell
|
||||
timedatectl set-timezone Europe/Oslo
|
||||
|
||||
apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl php-intl
|
||||
|
||||
# Create a user account
|
||||
useradd --shell /bin/bash --create-home rss-bridge
|
||||
|
||||
cd /var/www
|
||||
composer create-project --no-dev rss-bridge/rss-bridge
|
||||
|
||||
# Create folder and change its ownership to rss-bridge
|
||||
mkdir rss-bridge && chown rss-bridge:rss-bridge rss-bridge/
|
||||
|
||||
# Become rss-bridge
|
||||
su rss-bridge
|
||||
|
||||
# Clone master branch into existing folder
|
||||
git clone https://github.com/RSS-Bridge/rss-bridge.git rss-bridge/
|
||||
cd rss-bridge
|
||||
|
||||
# Copy over the default config (OPTIONAL)
|
||||
cp -v config.default.ini.php config.ini.php
|
||||
|
||||
# Recursively give full permissions to user/owner
|
||||
chmod 700 --recursive ./
|
||||
|
||||
# Give read and execute to others on folder ./static
|
||||
chmod o+rx ./ ./static
|
||||
|
||||
# Recursively give give read to others on folder ./static
|
||||
chmod o+r --recursive ./static
|
||||
```
|
||||
|
||||
```shell
|
||||
cd /var/www
|
||||
git clone https://github.com/RSS-Bridge/rss-bridge.git
|
||||
```
|
||||
|
||||
Config:
|
||||
|
||||
```shell
|
||||
# Give the http user write permission to the cache folder
|
||||
chown www-data:www-data /var/www/rss-bridge/cache
|
||||
|
||||
# Optionally copy over the default config file
|
||||
cp config.default.ini.php config.ini.php
|
||||
```
|
||||
|
||||
Example config for nginx:
|
||||
Nginx config:
|
||||
|
||||
```nginx
|
||||
# /etc/nginx/sites-enabled/rssbridge
|
||||
# /etc/nginx/sites-enabled/rss-bridge.conf
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
server_name example.com;
|
||||
root /var/www/rss-bridge;
|
||||
index index.php;
|
||||
|
||||
location ~ \.php$ {
|
||||
# TODO: change to your own server name
|
||||
server_name example.com;
|
||||
|
||||
access_log /var/log/nginx/rss-bridge.access.log;
|
||||
error_log /var/log/nginx/rss-bridge.error.log;
|
||||
log_not_found off;
|
||||
|
||||
# Intentionally not setting a root folder
|
||||
|
||||
# Static content only served here
|
||||
location /static/ {
|
||||
alias /var/www/rss-bridge/static/;
|
||||
}
|
||||
|
||||
# Pass off to php-fpm only when location is EXACTLY == /
|
||||
location = / {
|
||||
root /var/www/rss-bridge/;
|
||||
include snippets/fastcgi-php.conf;
|
||||
fastcgi_read_timeout 60s;
|
||||
fastcgi_pass unix:/run/php/php-fpm.sock;
|
||||
fastcgi_read_timeout 45s;
|
||||
fastcgi_pass unix:/run/php/rss-bridge.sock;
|
||||
}
|
||||
|
||||
# Reduce log noise
|
||||
location = /favicon.ico {
|
||||
access_log off;
|
||||
}
|
||||
|
||||
# Reduce log noise
|
||||
location = /robots.txt {
|
||||
access_log off;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Install with Docker:
|
||||
PHP FPM pool config:
|
||||
```ini
|
||||
; /etc/php/8.2/fpm/pool.d/rss-bridge.conf
|
||||
|
||||
Install by using docker image from Docker Hub:
|
||||
[rss-bridge]
|
||||
|
||||
user = rss-bridge
|
||||
group = rss-bridge
|
||||
|
||||
listen = /run/php/rss-bridge.sock
|
||||
|
||||
listen.owner = www-data
|
||||
listen.group = www-data
|
||||
|
||||
; Create 10 workers standing by to serve requests
|
||||
pm = static
|
||||
pm.max_children = 10
|
||||
|
||||
; Respawn worker after 500 requests (workaround for memory leaks etc.)
|
||||
pm.max_requests = 500
|
||||
```
|
||||
|
||||
PHP ini config:
|
||||
```ini
|
||||
; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini
|
||||
|
||||
max_execution_time = 15
|
||||
memory_limit = 64M
|
||||
```
|
||||
|
||||
Restart fpm and nginx:
|
||||
|
||||
```shell
|
||||
# Lint and restart php-fpm
|
||||
php-fpm8.2 -t && systemctl restart php8.2-fpm
|
||||
|
||||
# Lint and restart nginx
|
||||
nginx -t && systemctl restart nginx
|
||||
```
|
||||
|
||||
### How to install from Composer
|
||||
|
||||
Install the latest release.
|
||||
|
||||
```shell
|
||||
cd /var/www
|
||||
composer create-project -v --no-dev --no-scripts rss-bridge/rss-bridge
|
||||
```
|
||||
|
||||
### How to install with Caddy
|
||||
|
||||
TODO. See https://github.com/RSS-Bridge/rss-bridge/issues/3785
|
||||
|
||||
### Install from Docker Hub:
|
||||
|
||||
Install by downloading the docker image from Docker Hub:
|
||||
|
||||
```bash
|
||||
# Create container
|
||||
docker create --name=rss-bridge --publish 3000:80 rssbridge/rss-bridge
|
||||
docker create --name=rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rssbridge/rss-bridge
|
||||
```
|
||||
|
||||
You can put custom `config.ini.php` and bridges into `./config`.
|
||||
|
||||
**You must restart container for custom changes to take effect.**
|
||||
|
||||
See `docker-entrypoint.sh` for details.
|
||||
|
||||
```bash
|
||||
# Start container
|
||||
docker start rss-bridge
|
||||
```
|
||||
|
||||
Browse http://localhost:3000/
|
||||
|
||||
Install by locally building the image:
|
||||
### Install by locally building from Dockerfile
|
||||
|
||||
```bash
|
||||
# Build image from Dockerfile
|
||||
docker build -t rss-bridge .
|
||||
|
||||
# Create container
|
||||
docker create --name rss-bridge --publish 3000:80 rss-bridge
|
||||
docker create --name rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rss-bridge
|
||||
```
|
||||
|
||||
# Start the container
|
||||
You can put custom `config.ini.php` and bridges into `./config`.
|
||||
|
||||
**You must restart container for custom changes to take effect.**
|
||||
|
||||
See `docker-entrypoint.sh` for details.
|
||||
|
||||
```bash
|
||||
# Start container
|
||||
docker start rss-bridge
|
||||
```
|
||||
|
||||
Browse http://localhost:3000/
|
||||
|
||||
#### Install with docker-compose
|
||||
### Install with docker-compose (using Docker Hub)
|
||||
|
||||
Create a `docker-compose.yml` file locally with with the following content:
|
||||
```yml
|
||||
version: '2'
|
||||
services:
|
||||
rss-bridge:
|
||||
image: rssbridge/rss-bridge:latest
|
||||
volumes:
|
||||
- </local/custom/path>:/config
|
||||
ports:
|
||||
- 3000:80
|
||||
restart: unless-stopped
|
||||
```
|
||||
You can put custom `config.ini.php` and bridges into `./config`.
|
||||
|
||||
Then launch with `docker-compose`:
|
||||
**You must restart container for custom changes to take effect.**
|
||||
|
||||
See `docker-entrypoint.sh` for details.
|
||||
|
||||
```bash
|
||||
docker-compose up
|
||||
@ -132,13 +248,14 @@ docker-compose up
|
||||
|
||||
Browse http://localhost:3000/
|
||||
|
||||
### Alternative installation methods
|
||||
### Other installation methods
|
||||
|
||||
[](https://my.scalingo.com/deploy?source=https://github.com/sebsauvage/rss-bridge)
|
||||
[](https://heroku.com/deploy)
|
||||
[](https://www.cloudron.io/store/com.rssbridgeapp.cloudronapp.html)
|
||||
[](https://www.pikapods.com/pods?run=rssbridge)
|
||||
|
||||
The Heroku quick deploy currently does not work. It might possibly work if you fork this repo and
|
||||
The Heroku quick deploy currently does not work. It might work if you fork this repo and
|
||||
modify the `repository` in `scalingo.json`. See https://github.com/RSS-Bridge/rss-bridge/issues/2688
|
||||
|
||||
Learn more in
|
||||
@ -146,6 +263,64 @@ Learn more in
|
||||
|
||||
## How-to
|
||||
|
||||
### How to fix "Access denied."
|
||||
|
||||
Output is from php-fpm. It is unable to read index.php.
|
||||
|
||||
chown rss-bridge:rss-bridge /var/www/rss-bridge/index.php
|
||||
|
||||
### How to password-protect the instance (token)
|
||||
|
||||
Modify `config.ini.php`:
|
||||
|
||||
[authentication]
|
||||
|
||||
token = "hunter2"
|
||||
|
||||
### How to remove all cache items
|
||||
|
||||
As current user:
|
||||
|
||||
bin/cache-clear
|
||||
|
||||
As user rss-bridge:
|
||||
|
||||
sudo -u rss-bridge bin/cache-clear
|
||||
|
||||
As root:
|
||||
|
||||
sudo bin/cache-clear
|
||||
|
||||
### How to remove all expired cache items
|
||||
|
||||
bin/cache-prune
|
||||
|
||||
### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable"
|
||||
|
||||
```shell
|
||||
# Give rss-bridge ownership
|
||||
chown rss-bridge:rss-bridge -R /var/www/rss-bridge/cache
|
||||
|
||||
# Or, give www-data ownership
|
||||
chown www-data:www-data -R /var/www/rss-bridge/cache
|
||||
|
||||
# Or, give everyone write permission
|
||||
chmod 777 -R /var/www/rss-bridge/cache
|
||||
|
||||
# Or last ditch effort (CAREFUL)
|
||||
rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/
|
||||
```
|
||||
|
||||
### How to fix "attempt to write a readonly database"
|
||||
|
||||
The sqlite files (db, wal and shm) are not writeable.
|
||||
|
||||
chown -v rss-bridge:rss-bridge cache/*
|
||||
|
||||
### How to fix "Unable to prepare statement: 1, no such table: storage"
|
||||
|
||||
rm cache/*
|
||||
|
||||
### How to create a new bridge from scratch
|
||||
|
||||
Create the new bridge in e.g. `bridges/BearBlogBridge.php`:
|
||||
@ -186,13 +361,92 @@ enabled_bridges[] = GettrBridge
|
||||
|
||||
### How to enable debug mode
|
||||
|
||||
The
|
||||
[debug mode](https://rss-bridge.github.io/rss-bridge/For_Developers/Debug_mode.html)
|
||||
disables the majority of caching operations.
|
||||
|
||||
enable_debug_mode = true
|
||||
|
||||
Learn more in [debug mode](https://rss-bridge.github.io/rss-bridge/For_Developers/Debug_mode.html).
|
||||
### How to switch to memcached as cache backend
|
||||
|
||||
```
|
||||
[cache]
|
||||
|
||||
; Cache backend: file (default), sqlite, memcached, null
|
||||
type = "memcached"
|
||||
```
|
||||
|
||||
### How to switch to sqlite3 as cache backend
|
||||
|
||||
type = "sqlite"
|
||||
|
||||
### How to disable bridge errors (as feed items)
|
||||
|
||||
When a bridge fails, RSS-Bridge will produce a feed with a single item describing the error.
|
||||
|
||||
This way, feed readers pick it up and you are notified.
|
||||
|
||||
If you don't want this behaviour, switch the error output to `http`:
|
||||
|
||||
[error]
|
||||
|
||||
; Defines how error messages are returned by RSS-Bridge
|
||||
;
|
||||
; "feed" = As part of the feed (default)
|
||||
; "http" = As HTTP error message
|
||||
; "none" = No errors are reported
|
||||
output = "http"
|
||||
|
||||
### How to accumulate errors before finally reporting it
|
||||
|
||||
Modify `report_limit` so that an error must occur 3 times before it is reported.
|
||||
|
||||
; Defines how often an error must occur before it is reported to the user
|
||||
report_limit = 3
|
||||
|
||||
The report count is reset to 0 each day.
|
||||
|
||||
### How to password-protect the instance (HTTP Basic Auth)
|
||||
|
||||
[authentication]
|
||||
|
||||
enable = true
|
||||
username = "alice"
|
||||
password = "cat"
|
||||
|
||||
Will typically require feed readers to be configured with the credentials.
|
||||
|
||||
It may also be possible to manually include the credentials in the URL:
|
||||
|
||||
https://alice:cat@rss-bridge.org/bridge01/?action=display&bridge=FabriceBellardBridge&format=Html
|
||||
|
||||
### How to create a new output format
|
||||
|
||||
[Create a new format](https://rss-bridge.github.io/rss-bridge/Format_API/index.html).
|
||||
See `formats/PlaintextFormat.php` for an example.
|
||||
|
||||
### How to run unit tests and linter
|
||||
|
||||
These commands require that you have installed the dev dependencies in `composer.json`.
|
||||
|
||||
Run all tests:
|
||||
|
||||
./vendor/bin/phpunit
|
||||
|
||||
Run a single test class:
|
||||
|
||||
./vendor/bin/phpunit --filter UrlTest
|
||||
|
||||
Run linter:
|
||||
|
||||
./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./
|
||||
|
||||
https://github.com/squizlabs/PHP_CodeSniffer/wiki
|
||||
|
||||
### How to spawn a minimal development environment
|
||||
|
||||
php -S 127.0.0.1:9001
|
||||
|
||||
http://127.0.0.1:9001/
|
||||
|
||||
## Explanation
|
||||
|
||||
@ -205,15 +459,18 @@ webmaster of
|
||||
See [CONTRIBUTORS.md](CONTRIBUTORS.md)
|
||||
|
||||
RSS-Bridge uses caching to prevent services from banning your server for repeatedly updating feeds.
|
||||
The specific cache duration can be different between bridges. Cached files are deleted automatically after 24 hours.
|
||||
The specific cache duration can be different between bridges.
|
||||
|
||||
RSS-Bridge allows you to take full control over which bridges are displayed to the user.
|
||||
That way you can host your own RSS-Bridge service with your favorite collection of bridges!
|
||||
|
||||
Current maintainers (as of 2024): @dvikan and @Mynacol #2519
|
||||
|
||||
## Reference
|
||||
|
||||
### FeedItem properties
|
||||
### Feed item structure
|
||||
|
||||
This is the feed item structure that bridges are expected to produce.
|
||||
|
||||
```php
|
||||
$item = [
|
||||
@ -236,13 +493,22 @@ That way you can host your own RSS-Bridge service with your favorite collection
|
||||
]
|
||||
```
|
||||
|
||||
### Output formats:
|
||||
### Output formats
|
||||
|
||||
* `Atom` : Atom feed, for use in feed readers
|
||||
* `Html` : Simple HTML page
|
||||
* `Json` : JSON, for consumption by other applications
|
||||
* `Mrss` : MRSS feed, for use in feed readers
|
||||
* `Plaintext` : Raw text, for consumption by other applications
|
||||
* `Atom`: Atom feed, for use in feed readers
|
||||
* `Html`: Simple HTML page
|
||||
* `Json`: JSON, for consumption by other applications
|
||||
* `Mrss`: MRSS feed, for use in feed readers
|
||||
* `Plaintext`: Raw text, for consumption by other applications
|
||||
* `Sfeed`: Text, TAB separated
|
||||
|
||||
### Cache backends
|
||||
|
||||
* `File`
|
||||
* `SQLite`
|
||||
* `Memcached`
|
||||
* `Array`
|
||||
* `Null`
|
||||
|
||||
### Licenses
|
||||
|
||||
|
@ -1,17 +1,5 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
/**
|
||||
* Checks if the website for a given bridge is reachable.
|
||||
*
|
||||
@ -26,23 +14,26 @@ class ConnectivityAction implements ActionInterface
|
||||
{
|
||||
private BridgeFactory $bridgeFactory;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->bridgeFactory = new BridgeFactory();
|
||||
public function __construct(
|
||||
BridgeFactory $bridgeFactory
|
||||
) {
|
||||
$this->bridgeFactory = $bridgeFactory;
|
||||
}
|
||||
|
||||
public function execute(array $request)
|
||||
public function __invoke(Request $request): Response
|
||||
{
|
||||
if (!Debug::isEnabled()) {
|
||||
throw new \Exception('This action is only available in debug mode!');
|
||||
return new Response('This action is only available in debug mode!', 403);
|
||||
}
|
||||
|
||||
if (!isset($request['bridge'])) {
|
||||
return render_template('connectivity.html.php');
|
||||
$bridgeName = $request->get('bridge');
|
||||
if (!$bridgeName) {
|
||||
return new Response(render_template('connectivity.html.php'));
|
||||
}
|
||||
$bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName);
|
||||
if (!$bridgeClassName) {
|
||||
return new Response('Bridge not found', 404);
|
||||
}
|
||||
|
||||
$bridgeClassName = $this->bridgeFactory->createBridgeClassName($request['bridge']);
|
||||
|
||||
return $this->reportBridgeConnectivity($bridgeClassName);
|
||||
}
|
||||
|
||||
@ -52,29 +43,25 @@ class ConnectivityAction implements ActionInterface
|
||||
throw new \Exception('Bridge is not whitelisted!');
|
||||
}
|
||||
|
||||
$retVal = [
|
||||
'bridge' => $bridgeClassName,
|
||||
'successful' => false,
|
||||
'http_code' => 200,
|
||||
];
|
||||
|
||||
$bridge = $this->bridgeFactory->create($bridgeClassName);
|
||||
$curl_opts = [
|
||||
CURLOPT_CONNECTTIMEOUT => 5
|
||||
CURLOPT_CONNECTTIMEOUT => 5,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
];
|
||||
$result = [
|
||||
'bridge' => $bridgeClassName,
|
||||
'successful' => false,
|
||||
'http_code' => null,
|
||||
];
|
||||
try {
|
||||
$reply = getContents($bridge::URI, [], $curl_opts, true);
|
||||
|
||||
if ($reply['code'] === 200) {
|
||||
$retVal['successful'] = true;
|
||||
if (strpos(implode('', $reply['status_lines']), '301 Moved Permanently')) {
|
||||
$retVal['http_code'] = 301;
|
||||
}
|
||||
$response = getContents($bridge::URI, [], $curl_opts, true);
|
||||
$result['http_code'] = $response->getCode();
|
||||
if (in_array($result['http_code'], [200])) {
|
||||
$result['successful'] = true;
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
$retVal['successful'] = false;
|
||||
}
|
||||
|
||||
return new Response(Json::encode($retVal), 200, ['Content-Type' => 'text/json']);
|
||||
return new Response(Json::encode($result), 200, ['content-type' => 'text/json']);
|
||||
}
|
||||
}
|
||||
|
@ -1,53 +1,51 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
class DetectAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
{
|
||||
$targetURL = $request['url'] ?? null;
|
||||
$format = $request['format'] ?? null;
|
||||
private BridgeFactory $bridgeFactory;
|
||||
|
||||
if (!$targetURL) {
|
||||
throw new \Exception('You must specify a url!');
|
||||
public function __construct(
|
||||
BridgeFactory $bridgeFactory
|
||||
) {
|
||||
$this->bridgeFactory = $bridgeFactory;
|
||||
}
|
||||
|
||||
public function __invoke(Request $request): Response
|
||||
{
|
||||
$url = $request->get('url');
|
||||
$format = $request->get('format');
|
||||
|
||||
if (!$url) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a url']));
|
||||
}
|
||||
if (!$format) {
|
||||
throw new \Exception('You must specify a format!');
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']));
|
||||
}
|
||||
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
|
||||
foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
|
||||
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
|
||||
if (!$this->bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
$bridge = $this->bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$bridgeParams = $bridge->detectParameters($targetURL);
|
||||
$bridgeParams = $bridge->detectParameters($url);
|
||||
|
||||
if (is_null($bridgeParams)) {
|
||||
if (!$bridgeParams) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$bridgeParams['bridge'] = $bridgeClassName;
|
||||
$bridgeParams['format'] = $format;
|
||||
|
||||
$url = '?action=display&' . http_build_query($bridgeParams);
|
||||
return new Response('', 301, ['Location' => $url]);
|
||||
$query = [
|
||||
'action' => 'display',
|
||||
'bridge' => $bridgeClassName,
|
||||
'format' => $format,
|
||||
];
|
||||
$query = array_merge($query, $bridgeParams);
|
||||
return new Response('', 301, ['location' => '?' . http_build_query($query)]);
|
||||
}
|
||||
|
||||
throw new \Exception('No bridge found for given URL: ' . $targetURL);
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', [
|
||||
'message' => 'No bridge found for given URL: ' . $url,
|
||||
]));
|
||||
}
|
||||
}
|
||||
|
@ -1,44 +1,43 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
class DisplayAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
private CacheInterface $cache;
|
||||
private Logger $logger;
|
||||
private BridgeFactory $bridgeFactory;
|
||||
|
||||
public function __construct(
|
||||
CacheInterface $cache,
|
||||
Logger $logger,
|
||||
BridgeFactory $bridgeFactory
|
||||
) {
|
||||
$this->cache = $cache;
|
||||
$this->logger = $logger;
|
||||
$this->bridgeFactory = $bridgeFactory;
|
||||
}
|
||||
|
||||
public function __invoke(Request $request): Response
|
||||
{
|
||||
if (Configuration::getConfig('system', 'enable_maintenance_mode')) {
|
||||
return new Response('503 Service Unavailable', 503);
|
||||
$bridgeName = $request->get('bridge');
|
||||
$format = $request->get('format');
|
||||
$noproxy = $request->get('_noproxy');
|
||||
|
||||
if (!$bridgeName) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Missing bridge name parameter']), 400);
|
||||
}
|
||||
$bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName);
|
||||
if (!$bridgeClassName) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Bridge not found']), 404);
|
||||
}
|
||||
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
|
||||
$bridgeClassName = $bridgeFactory->createBridgeClassName($request['bridge'] ?? '');
|
||||
|
||||
$format = $request['format'] ?? null;
|
||||
if (!$format) {
|
||||
throw new \Exception('You must specify a format!');
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']), 400);
|
||||
}
|
||||
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
throw new \Exception('This bridge is not whitelisted');
|
||||
if (!$this->bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'This bridge is not whitelisted']), 400);
|
||||
}
|
||||
|
||||
$formatFactory = new FormatFactory();
|
||||
$format = $formatFactory->create($format);
|
||||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
$bridge->loadConfiguration();
|
||||
|
||||
$noproxy = $request['_noproxy'] ?? null;
|
||||
// Disable proxy (if enabled and per user's request)
|
||||
if (
|
||||
Configuration::getConfig('proxy', 'url')
|
||||
&& Configuration::getConfig('proxy', 'by_bridge')
|
||||
@ -48,176 +47,135 @@ class DisplayAction implements ActionInterface
|
||||
define('NOPROXY', true);
|
||||
}
|
||||
|
||||
$cacheTimeout = $request['_cache_timeout'] ?? null;
|
||||
if (Configuration::getConfig('cache', 'custom_timeout') && $cacheTimeout) {
|
||||
$cacheTimeout = (int) $cacheTimeout;
|
||||
} else {
|
||||
// At this point the query argument might still be in the url but it won't be used
|
||||
$cacheTimeout = $bridge->getCacheTimeout();
|
||||
$cacheKey = 'http_' . json_encode($request->toArray());
|
||||
|
||||
$bridge = $this->bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$response = $this->createResponse($request, $bridge, $format);
|
||||
|
||||
if ($response->getCode() === 200) {
|
||||
$ttl = $request->get('_cache_timeout');
|
||||
if (Configuration::getConfig('cache', 'custom_timeout') && $ttl) {
|
||||
$ttl = (int) $ttl;
|
||||
} else {
|
||||
$ttl = $bridge->getCacheTimeout();
|
||||
}
|
||||
$this->cache->set($cacheKey, $response, $ttl);
|
||||
}
|
||||
|
||||
// Remove parameters that don't concern bridges
|
||||
$bridge_params = array_diff_key(
|
||||
$request,
|
||||
array_fill_keys(
|
||||
[
|
||||
'action',
|
||||
'bridge',
|
||||
'format',
|
||||
'_noproxy',
|
||||
'_cache_timeout',
|
||||
'_error_time'
|
||||
],
|
||||
''
|
||||
)
|
||||
);
|
||||
|
||||
// Remove parameters that don't concern caches
|
||||
$cache_params = array_diff_key(
|
||||
$request,
|
||||
array_fill_keys(
|
||||
[
|
||||
'action',
|
||||
'format',
|
||||
'_noproxy',
|
||||
'_cache_timeout',
|
||||
'_error_time'
|
||||
],
|
||||
''
|
||||
)
|
||||
);
|
||||
|
||||
$cache = RssBridge::getCache();
|
||||
$cache->setScope('');
|
||||
$cache->setKey($cache_params);
|
||||
// This cache purge will basically delete all cache items older than 24h, regardless of scope and key
|
||||
$cache->purgeCache(86400);
|
||||
|
||||
$items = [];
|
||||
$infos = [];
|
||||
$mtime = $cache->getTime();
|
||||
|
||||
if (
|
||||
$mtime
|
||||
&& (time() - $cacheTimeout < $mtime)
|
||||
&& !Debug::isEnabled()
|
||||
) {
|
||||
// At this point we found the feed in the cache and debug mode is disabled
|
||||
|
||||
if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) {
|
||||
// The client wants to know if the feed has changed since its last check
|
||||
$stime = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']);
|
||||
if ($mtime <= $stime) {
|
||||
$lastModified2 = gmdate('D, d M Y H:i:s ', $mtime) . 'GMT';
|
||||
return new Response('', 304, ['Last-Modified' => $lastModified2]);
|
||||
}
|
||||
}
|
||||
|
||||
// Load the feed from cache and prepare it
|
||||
$cached = $cache->loadData();
|
||||
if (isset($cached['items']) && isset($cached['extraInfos'])) {
|
||||
foreach ($cached['items'] as $item) {
|
||||
$items[] = new FeedItem($item);
|
||||
}
|
||||
$infos = $cached['extraInfos'];
|
||||
}
|
||||
} else {
|
||||
// At this point we did NOT find the feed in the cache or debug mode is enabled.
|
||||
try {
|
||||
$bridge->setDatas($bridge_params);
|
||||
$bridge->collectData();
|
||||
|
||||
$items = $bridge->getItems();
|
||||
|
||||
if (isset($items[0]) && is_array($items[0])) {
|
||||
$feedItems = [];
|
||||
foreach ($items as $item) {
|
||||
$feedItems[] = new FeedItem($item);
|
||||
}
|
||||
$items = $feedItems;
|
||||
}
|
||||
$infos = [
|
||||
'name' => $bridge->getName(),
|
||||
'uri' => $bridge->getURI(),
|
||||
'donationUri' => $bridge->getDonationURI(),
|
||||
'icon' => $bridge->getIcon()
|
||||
];
|
||||
} catch (\Throwable $e) {
|
||||
if ($e instanceof HttpException) {
|
||||
// Produce a smaller log record for http exceptions
|
||||
Logger::warning(sprintf('Exception in %s: %s', $bridgeClassName, create_sane_exception_message($e)));
|
||||
} else {
|
||||
// Log the exception
|
||||
Logger::error(sprintf('Exception in %s', $bridgeClassName), ['e' => $e]);
|
||||
}
|
||||
|
||||
// Emit error only if we are passed the error report limit
|
||||
$errorCount = self::logBridgeError($bridge->getName(), $e->getCode());
|
||||
if ($errorCount >= Configuration::getConfig('error', 'report_limit')) {
|
||||
if (Configuration::getConfig('error', 'output') === 'feed') {
|
||||
// Emit the error as a feed item in a feed so that feed readers can pick it up
|
||||
$items[] = $this->createFeedItemFromException($e, $bridge);
|
||||
} elseif (Configuration::getConfig('error', 'output') === 'http') {
|
||||
// Emit as a regular web response
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unfortunately need to set scope and key again because they might be modified
|
||||
$cache->setScope('');
|
||||
$cache->setKey($cache_params);
|
||||
$cache->saveData([
|
||||
'items' => array_map(function (FeedItem $item) {
|
||||
return $item->toArray();
|
||||
}, $items),
|
||||
'extraInfos' => $infos
|
||||
]);
|
||||
}
|
||||
|
||||
$format->setItems($items);
|
||||
$format->setExtraInfos($infos);
|
||||
$lastModified = $cache->getTime();
|
||||
$format->setLastModified($lastModified);
|
||||
$headers = [];
|
||||
if ($lastModified) {
|
||||
$headers['Last-Modified'] = gmdate('D, d M Y H:i:s ', $lastModified) . 'GMT';
|
||||
}
|
||||
$headers['Content-Type'] = $format->getMimeType() . '; charset=' . $format->getCharset();
|
||||
return new Response($format->stringify(), 200, $headers);
|
||||
return $response;
|
||||
}
|
||||
|
||||
private function createFeedItemFromException($e, BridgeInterface $bridge): FeedItem
|
||||
private function createResponse(Request $request, BridgeAbstract $bridge, string $format)
|
||||
{
|
||||
$item = new FeedItem();
|
||||
$items = [];
|
||||
|
||||
try {
|
||||
$bridge->loadConfiguration();
|
||||
// Remove parameters that don't concern bridges
|
||||
$remove = [
|
||||
'token',
|
||||
'action',
|
||||
'bridge',
|
||||
'format',
|
||||
'_noproxy',
|
||||
'_cache_timeout',
|
||||
'_error_time',
|
||||
'_', // Some RSS readers add a cache-busting parameter (_=<timestamp>) to feed URLs, detect and ignore them.
|
||||
];
|
||||
$requestArray = $request->toArray();
|
||||
$input = array_diff_key($requestArray, array_fill_keys($remove, ''));
|
||||
$bridge->setInput($input);
|
||||
$bridge->collectData();
|
||||
$items = $bridge->getItems();
|
||||
} catch (\Throwable $e) {
|
||||
if ($e instanceof RateLimitException) {
|
||||
// These are internally generated by bridges
|
||||
$this->logger->info(sprintf('RateLimitException in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e)));
|
||||
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 429);
|
||||
}
|
||||
if ($e instanceof HttpException) {
|
||||
if (in_array($e->getCode(), [429, 503])) {
|
||||
// Log with debug, immediately reproduce and return
|
||||
$this->logger->debug(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e)));
|
||||
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), $e->getCode());
|
||||
}
|
||||
// Some other status code which we let fail normally (but don't log it)
|
||||
} else {
|
||||
// Log error if it's not an HttpException
|
||||
$this->logger->error(sprintf('Exception in DisplayAction(%s)', $bridge->getShortName()), ['e' => $e]);
|
||||
}
|
||||
$errorOutput = Configuration::getConfig('error', 'output');
|
||||
$reportLimit = Configuration::getConfig('error', 'report_limit');
|
||||
$errorCount = 1;
|
||||
if ($reportLimit > 1) {
|
||||
$errorCount = $this->logBridgeError($bridge->getName(), $e->getCode());
|
||||
}
|
||||
// Let clients know about the error if we are passed the report limit
|
||||
if ($errorCount >= $reportLimit) {
|
||||
if ($errorOutput === 'feed') {
|
||||
// Render the exception as a feed item
|
||||
$items = [$this->createFeedItemFromException($e, $bridge)];
|
||||
} elseif ($errorOutput === 'http') {
|
||||
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 500);
|
||||
} elseif ($errorOutput === 'none') {
|
||||
// Do nothing (produces an empty feed)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$formatFactory = new FormatFactory();
|
||||
$format = $formatFactory->create($format);
|
||||
|
||||
$format->setItems($items);
|
||||
$format->setFeed($bridge->getFeed());
|
||||
$now = time();
|
||||
$format->setLastModified($now);
|
||||
$headers = [
|
||||
'last-modified' => gmdate('D, d M Y H:i:s ', $now) . 'GMT',
|
||||
'content-type' => $format->getMimeType() . '; charset=UTF-8',
|
||||
];
|
||||
$body = $format->render();
|
||||
|
||||
// This is supposed to remove non-utf8 byte sequences, but I'm unsure if it works
|
||||
ini_set('mbstring.substitute_character', 'none');
|
||||
$body = mb_convert_encoding($body, 'UTF-8', 'UTF-8');
|
||||
|
||||
return new Response($body, 200, $headers);
|
||||
}
|
||||
|
||||
private function createFeedItemFromException($e, BridgeAbstract $bridge): array
|
||||
{
|
||||
$item = [];
|
||||
|
||||
// Create a unique identifier every 24 hours
|
||||
$uniqueIdentifier = urlencode((int)(time() / 86400));
|
||||
$itemTitle = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier);
|
||||
$item->setTitle($itemTitle);
|
||||
$item->setURI(get_current_url());
|
||||
$item->setTimestamp(time());
|
||||
$title = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier);
|
||||
|
||||
$item['title'] = $title;
|
||||
$item['uri'] = get_current_url();
|
||||
$item['timestamp'] = time();
|
||||
|
||||
// Create an item identifier for feed readers e.g. "staysafetv twitch videos_19389"
|
||||
$item->setUid($bridge->getName() . '_' . $uniqueIdentifier);
|
||||
$item['uid'] = $bridge->getName() . '_' . $uniqueIdentifier;
|
||||
|
||||
$content = render_template(__DIR__ . '/../templates/bridge-error.html.php', [
|
||||
'error' => render_template(__DIR__ . '/../templates/error.html.php', ['e' => $e]),
|
||||
'error' => render_template(__DIR__ . '/../templates/exception.html.php', ['e' => $e]),
|
||||
'searchUrl' => self::createGithubSearchUrl($bridge),
|
||||
'issueUrl' => self::createGithubIssueUrl($bridge, $e, create_sane_exception_message($e)),
|
||||
'issueUrl' => self::createGithubIssueUrl($bridge, $e),
|
||||
'maintainer' => $bridge->getMaintainer(),
|
||||
]);
|
||||
$item->setContent($content);
|
||||
$item['content'] = $content;
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
private static function logBridgeError($bridgeName, $code)
|
||||
private function logBridgeError($bridgeName, $code)
|
||||
{
|
||||
$cache = RssBridge::getCache();
|
||||
$cache->setScope('error_reporting');
|
||||
$cache->setkey([$bridgeName . '_' . $code]);
|
||||
|
||||
if ($report = $cache->loadData()) {
|
||||
// todo: it's not really necessary to json encode $report
|
||||
$cacheKey = 'error_reporting_' . $bridgeName . '_' . $code;
|
||||
$report = $this->cache->get($cacheKey);
|
||||
if ($report) {
|
||||
$report = Json::decode($report);
|
||||
$report['time'] = time();
|
||||
$report['count']++;
|
||||
@ -228,26 +186,39 @@ class DisplayAction implements ActionInterface
|
||||
'count' => 1,
|
||||
];
|
||||
}
|
||||
$cache->saveData(Json::encode($report));
|
||||
$ttl = 86400 * 5;
|
||||
$this->cache->set($cacheKey, Json::encode($report), $ttl);
|
||||
return $report['count'];
|
||||
}
|
||||
|
||||
private static function createGithubIssueUrl($bridge, $e, string $message): string
|
||||
private static function createGithubIssueUrl(BridgeAbstract $bridge, \Throwable $e): string
|
||||
{
|
||||
return sprintf('https://github.com/RSS-Bridge/rss-bridge/issues/new?%s', http_build_query([
|
||||
'title' => sprintf('%s failed with error %s', $bridge->getName(), $e->getCode()),
|
||||
$maintainer = $bridge->getMaintainer();
|
||||
if (str_contains($maintainer, ',')) {
|
||||
$maintainers = explode(',', $maintainer);
|
||||
} else {
|
||||
$maintainers = [$maintainer];
|
||||
}
|
||||
$maintainers = array_map('trim', $maintainers);
|
||||
|
||||
$queryString = $_SERVER['QUERY_STRING'] ?? '';
|
||||
$query = [
|
||||
'title' => $bridge->getName() . ' failed with: ' . $e->getMessage(),
|
||||
'body' => sprintf(
|
||||
"```\n%s\n\n%s\n\nQuery string: %s\nVersion: %s\nOs: %s\nPHP version: %s\n```",
|
||||
$message,
|
||||
"```\n%s\n\n%s\n\nQuery string: %s\nVersion: %s\nOs: %s\nPHP version: %s\n```\nMaintainer: @%s",
|
||||
create_sane_exception_message($e),
|
||||
implode("\n", trace_to_call_points(trace_from_exception($e))),
|
||||
$_SERVER['QUERY_STRING'] ?? '',
|
||||
$queryString,
|
||||
Configuration::getVersion(),
|
||||
PHP_OS_FAMILY,
|
||||
phpversion() ?: 'Unknown'
|
||||
phpversion() ?: 'Unknown',
|
||||
implode(', @', $maintainers),
|
||||
),
|
||||
'labels' => 'Bridge-Broken',
|
||||
'assignee' => $bridge->getMaintainer(),
|
||||
]));
|
||||
'assignee' => $maintainer[0],
|
||||
];
|
||||
|
||||
return 'https://github.com/RSS-Bridge/rss-bridge/issues/new?' . http_build_query($query);
|
||||
}
|
||||
|
||||
private static function createGithubSearchUrl($bridge): string
|
||||
|
95
actions/FindfeedAction.php
Normal file
95
actions/FindfeedAction.php
Normal file
@ -0,0 +1,95 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This action is used by the frontpage form search.
|
||||
* It finds a bridge based off of a user input url.
|
||||
* It uses bridges' detectParameters implementation.
|
||||
*/
|
||||
class FindfeedAction implements ActionInterface
|
||||
{
|
||||
private BridgeFactory $bridgeFactory;
|
||||
|
||||
public function __construct(
|
||||
BridgeFactory $bridgeFactory
|
||||
) {
|
||||
$this->bridgeFactory = $bridgeFactory;
|
||||
}
|
||||
|
||||
public function __invoke(Request $request): Response
|
||||
{
|
||||
$url = $request->get('url');
|
||||
$format = $request->get('format');
|
||||
|
||||
if (!$url) {
|
||||
return new Response('You must specify a url', 400);
|
||||
}
|
||||
if (!$format) {
|
||||
return new Response('You must specify a format', 400);
|
||||
}
|
||||
|
||||
$results = [];
|
||||
foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
|
||||
if (!$this->bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$bridge = $this->bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$bridgeParams = $bridge->detectParameters($url);
|
||||
|
||||
if ($bridgeParams === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// It's allowed to have no 'context' in a bridge (only a default context without any name)
|
||||
// In this case, the reference to the parameters are found in the first element of the PARAMETERS array
|
||||
|
||||
$context = $bridgeParams['context'] ?? 0;
|
||||
|
||||
$bridgeData = [];
|
||||
// Construct the array of parameters
|
||||
foreach ($bridgeParams as $key => $value) {
|
||||
// 'context' is a special case : it's a bridge parameters, there is no "name" for this parameter
|
||||
if ($key == 'context') {
|
||||
$bridgeData[$key]['name'] = 'Context';
|
||||
$bridgeData[$key]['value'] = $value;
|
||||
} else {
|
||||
$bridgeData[$key]['name'] = $this->getParameterName($bridge, $context, $key);
|
||||
$bridgeData[$key]['value'] = $value;
|
||||
}
|
||||
}
|
||||
|
||||
$bridgeParams['bridge'] = $bridgeClassName;
|
||||
$bridgeParams['format'] = $format;
|
||||
$content = [
|
||||
'url' => './?action=display&' . http_build_query($bridgeParams),
|
||||
'bridgeParams' => $bridgeParams,
|
||||
'bridgeData' => $bridgeData,
|
||||
'bridgeMeta' => [
|
||||
'name' => $bridge::NAME,
|
||||
'description' => $bridge::DESCRIPTION,
|
||||
'parameters' => $bridge::PARAMETERS,
|
||||
'icon' => $bridge->getIcon(),
|
||||
],
|
||||
];
|
||||
$results[] = $content;
|
||||
}
|
||||
if ($results === []) {
|
||||
return new Response(Json::encode(['message' => 'No bridge found for given url']), 404, ['content-type' => 'application/json']);
|
||||
}
|
||||
return new Response(Json::encode($results), 200, ['content-type' => 'application/json']);
|
||||
}
|
||||
|
||||
// Get parameter name in the actual context, or in the global parameter
|
||||
private function getParameterName($bridge, $context, $key)
|
||||
{
|
||||
if (isset($bridge::PARAMETERS[$context][$key]['name'])) {
|
||||
$name = $bridge::PARAMETERS[$context][$key]['name'];
|
||||
} else if (isset($bridge::PARAMETERS['global'][$key]['name'])) {
|
||||
$name = $bridge::PARAMETERS['global'][$key]['name'];
|
||||
} else {
|
||||
$name = 'Variable "' . $key . '" (No name provided)';
|
||||
}
|
||||
return $name;
|
||||
}
|
||||
}
|
@ -2,35 +2,48 @@
|
||||
|
||||
final class FrontpageAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
private BridgeFactory $bridgeFactory;
|
||||
|
||||
public function __construct(
|
||||
BridgeFactory $bridgeFactory
|
||||
) {
|
||||
$this->bridgeFactory = $bridgeFactory;
|
||||
}
|
||||
|
||||
public function __invoke(Request $request): Response
|
||||
{
|
||||
$showInactive = (bool) ($request['show_inactive'] ?? null);
|
||||
$token = $request->getAttribute('token');
|
||||
|
||||
$messages = [];
|
||||
$activeBridges = 0;
|
||||
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
$bridgeClassNames = $bridgeFactory->getBridgeClassNames();
|
||||
$bridgeClassNames = $this->bridgeFactory->getBridgeClassNames();
|
||||
|
||||
$formatFactory = new FormatFactory();
|
||||
$formats = $formatFactory->getFormatNames();
|
||||
foreach ($this->bridgeFactory->getMissingEnabledBridges() as $missingEnabledBridge) {
|
||||
$messages[] = [
|
||||
'body' => sprintf('Warning : Bridge "%s" not found', $missingEnabledBridge),
|
||||
'level' => 'warning'
|
||||
];
|
||||
}
|
||||
|
||||
$body = '';
|
||||
foreach ($bridgeClassNames as $bridgeClassName) {
|
||||
if ($bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
$body .= BridgeCard::displayBridgeCard($bridgeClassName, $formats);
|
||||
if ($this->bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
$body .= BridgeCard::render($this->bridgeFactory, $bridgeClassName, $token);
|
||||
$activeBridges++;
|
||||
} elseif ($showInactive) {
|
||||
$body .= BridgeCard::displayBridgeCard($bridgeClassName, $formats, false) . PHP_EOL;
|
||||
}
|
||||
}
|
||||
|
||||
return render(__DIR__ . '/../templates/frontpage.html.php', [
|
||||
'messages' => [],
|
||||
'admin_email' => Configuration::getConfig('admin', 'email'),
|
||||
'admin_telegram' => Configuration::getConfig('admin', 'telegram'),
|
||||
'bridges' => $body,
|
||||
'active_bridges' => $activeBridges,
|
||||
'total_bridges' => count($bridgeClassNames),
|
||||
'show_inactive' => $showInactive,
|
||||
]);
|
||||
$response = new Response(render(__DIR__ . '/../templates/frontpage.html.php', [
|
||||
'messages' => $messages,
|
||||
'admin_email' => Configuration::getConfig('admin', 'email'),
|
||||
'admin_telegram' => Configuration::getConfig('admin', 'telegram'),
|
||||
'bridges' => $body,
|
||||
'active_bridges' => $activeBridges,
|
||||
'total_bridges' => count($bridgeClassNames),
|
||||
]));
|
||||
|
||||
// TODO: The rendered template could be cached, but beware config changes that changes the html
|
||||
return $response;
|
||||
}
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
class HealthAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
public function __invoke(Request $request): Response
|
||||
{
|
||||
$response = [
|
||||
'code' => 200,
|
||||
|
@ -1,42 +1,36 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
class ListAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
private BridgeFactory $bridgeFactory;
|
||||
|
||||
public function __construct(
|
||||
BridgeFactory $bridgeFactory
|
||||
) {
|
||||
$this->bridgeFactory = $bridgeFactory;
|
||||
}
|
||||
|
||||
public function __invoke(Request $request): Response
|
||||
{
|
||||
$list = new \stdClass();
|
||||
$list->bridges = [];
|
||||
$list->total = 0;
|
||||
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
|
||||
foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
|
||||
$bridge = $this->bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$list->bridges[$bridgeClassName] = [
|
||||
'status' => $bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive',
|
||||
'uri' => $bridge->getURI(),
|
||||
'donationUri' => $bridge->getDonationURI(),
|
||||
'name' => $bridge->getName(),
|
||||
'icon' => $bridge->getIcon(),
|
||||
'parameters' => $bridge->getParameters(),
|
||||
'maintainer' => $bridge->getMaintainer(),
|
||||
'description' => $bridge->getDescription()
|
||||
'status' => $this->bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive',
|
||||
'uri' => $bridge->getURI(),
|
||||
'donationUri' => $bridge->getDonationURI(),
|
||||
'name' => $bridge->getName(),
|
||||
'icon' => $bridge->getIcon(),
|
||||
'parameters' => $bridge->getParameters(),
|
||||
'maintainer' => $bridge->getMaintainer(),
|
||||
'description' => $bridge->getDescription()
|
||||
];
|
||||
}
|
||||
$list->total = count($list->bridges);
|
||||
return new Response(Json::encode($list), 200, ['Content-Type' => 'application/json']);
|
||||
return new Response(Json::encode($list), 200, ['content-type' => 'application/json']);
|
||||
}
|
||||
}
|
||||
|
@ -1,50 +0,0 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
class SetBridgeCacheAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
{
|
||||
$authenticationMiddleware = new ApiAuthenticationMiddleware();
|
||||
$authenticationMiddleware($request);
|
||||
|
||||
$key = $request['key'] or returnClientError('You must specify key!');
|
||||
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
|
||||
$bridgeClassName = $bridgeFactory->createBridgeClassName($request['bridge'] ?? '');
|
||||
|
||||
// whitelist control
|
||||
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
throw new \Exception('This bridge is not whitelisted', 401);
|
||||
die;
|
||||
}
|
||||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
$bridge->loadConfiguration();
|
||||
$value = $request['value'];
|
||||
|
||||
$cache = RssBridge::getCache();
|
||||
$cache->setScope(get_class($bridge));
|
||||
if (!is_array($key)) {
|
||||
// not sure if $key is an array when it comes in from request
|
||||
$key = [$key];
|
||||
}
|
||||
$cache->setKey($key);
|
||||
$cache->saveData($value);
|
||||
|
||||
header('Content-Type: text/plain');
|
||||
echo 'done';
|
||||
}
|
||||
}
|
16
bin/cache-clear
Executable file
16
bin/cache-clear
Executable file
@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Remove all items from the cache
|
||||
*/
|
||||
|
||||
require __DIR__ . '/../lib/bootstrap.php';
|
||||
require __DIR__ . '/../lib/config.php';
|
||||
|
||||
$container = require __DIR__ . '/../lib/dependencies.php';
|
||||
|
||||
/** @var CacheInterface $cache */
|
||||
$cache = $container['cache'];
|
||||
|
||||
$cache->clear();
|
24
bin/cache-prune
Executable file
24
bin/cache-prune
Executable file
@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Remove all expired items from the cache
|
||||
*/
|
||||
|
||||
require __DIR__ . '/../lib/bootstrap.php';
|
||||
require __DIR__ . '/../lib/config.php';
|
||||
|
||||
$container = require __DIR__ . '/../lib/dependencies.php';
|
||||
|
||||
if (
|
||||
Configuration::getConfig('cache', 'type') === 'file'
|
||||
&& !Configuration::getConfig('FileCache', 'enable_purge')
|
||||
) {
|
||||
// Override enable_purge for this particular execution
|
||||
Configuration::setConfig('FileCache', 'enable_purge', true);
|
||||
}
|
||||
|
||||
/** @var CacheInterface $cache */
|
||||
$cache = $container['cache'];
|
||||
|
||||
$cache->prune();
|
20
bin/test
Executable file
20
bin/test
Executable file
@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Add log records to all three levels (for testing purposes)
|
||||
*/
|
||||
|
||||
require __DIR__ . '/../lib/bootstrap.php';
|
||||
require __DIR__ . '/../lib/config.php';
|
||||
|
||||
$container = require __DIR__ . '/../lib/dependencies.php';
|
||||
|
||||
/** @var Logger $logger */
|
||||
$logger = $container['logger'];
|
||||
|
||||
$logger->debug('This is a test debug message');
|
||||
|
||||
$logger->info('This is a test info message');
|
||||
|
||||
$logger->error('This is a test error message');
|
@ -31,17 +31,17 @@ class ABCNewsBridge extends BridgeAbstract
|
||||
{
|
||||
$url = sprintf('https://www.abc.net.au/news/%s', $this->getInput('topic'));
|
||||
$dom = getSimpleHTMLDOM($url);
|
||||
$dom = $dom->find('div[data-component="CardList"]', 0);
|
||||
$dom = $dom->find('div[data-component="PaginationList"]', 0);
|
||||
if (!$dom) {
|
||||
throw new \Exception(sprintf('Unable to find css selector on `%s`', $url));
|
||||
}
|
||||
$dom = defaultLinkTo($dom, $this->getURI());
|
||||
foreach ($dom->find('div[data-component="GenericCard"]') as $article) {
|
||||
foreach ($dom->find('article[data-component="DetailCard"]') as $article) {
|
||||
$a = $article->find('a', 0);
|
||||
$this->items[] = [
|
||||
'title' => $a->plaintext,
|
||||
'uri' => $a->href,
|
||||
'content' => $article->find('[data-component="CardDescription"]', 0)->plaintext,
|
||||
'content' => $article->find('p', 0)->plaintext,
|
||||
'timestamp' => strtotime($article->find('time', 0)->datetime),
|
||||
];
|
||||
}
|
||||
|
@ -12,9 +12,29 @@ class AO3Bridge extends BridgeAbstract
|
||||
'url' => [
|
||||
'name' => 'url',
|
||||
'required' => true,
|
||||
// Example: F/F tag, complete works only
|
||||
'exampleValue' => 'https://archiveofourown.org/works?work_search[complete]=T&tag_id=F*s*F',
|
||||
// Example: F/F tag
|
||||
'exampleValue' => 'https://archiveofourown.org/tags/F*s*F/works',
|
||||
],
|
||||
'range' => [
|
||||
'name' => 'Chapter Content',
|
||||
'title' => 'Chapter(s) to include in each work\'s feed entry',
|
||||
'defaultValue' => null,
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'None' => null,
|
||||
'First' => 'first',
|
||||
'Latest' => 'last',
|
||||
'Entire work' => 'all',
|
||||
],
|
||||
],
|
||||
'unique' => [
|
||||
'name' => 'Make separate entries for new fic chapters',
|
||||
'type' => 'checkbox',
|
||||
'required' => false,
|
||||
'title' => 'Make separate entries for new fic chapters',
|
||||
'defaultValue' => 'checked',
|
||||
],
|
||||
'limit' => self::LIMIT,
|
||||
],
|
||||
'Bookmarks' => [
|
||||
'user' => [
|
||||
@ -33,23 +53,19 @@ class AO3Bridge extends BridgeAbstract
|
||||
],
|
||||
]
|
||||
];
|
||||
private $title;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
switch ($this->queriedContext) {
|
||||
case 'Bookmarks':
|
||||
$user = $this->getInput('user');
|
||||
$this->title = $user;
|
||||
$url = self::URI
|
||||
. '/users/' . $user
|
||||
. '/bookmarks?bookmark_search[sort_column]=bookmarkable_date';
|
||||
$this->collectList($url);
|
||||
$this->collectList($this->getURI());
|
||||
break;
|
||||
case 'List':
|
||||
$this->collectList($this->getInput('url'));
|
||||
$this->collectList($this->getURI());
|
||||
break;
|
||||
case 'Work':
|
||||
$this->collectWork($this->getInput('id'));
|
||||
$this->collectWork($this->getURI());
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -60,9 +76,24 @@ class AO3Bridge extends BridgeAbstract
|
||||
*/
|
||||
private function collectList($url)
|
||||
{
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$version = 'v0.0.1';
|
||||
$headers = [
|
||||
"useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"
|
||||
];
|
||||
$response = getContents($url, $headers);
|
||||
|
||||
$html = \str_get_html($response);
|
||||
$html = defaultLinkTo($html, self::URI);
|
||||
|
||||
// Get list title. Will include page range + count in some cases
|
||||
$heading = ($html->find('#main h2', 0));
|
||||
if ($heading->find('a.tag')) {
|
||||
$heading = $heading->find('a.tag', 0);
|
||||
}
|
||||
$this->title = $heading->plaintext;
|
||||
|
||||
$limit = $this->getInput('limit') ?? 3;
|
||||
$count = 0;
|
||||
foreach ($html->find('.index.group > li') as $element) {
|
||||
$item = [];
|
||||
|
||||
@ -71,16 +102,70 @@ class AO3Bridge extends BridgeAbstract
|
||||
continue; // discard deleted works
|
||||
}
|
||||
$item['title'] = $title->plaintext;
|
||||
$item['content'] = $element;
|
||||
$item['uri'] = $title->href;
|
||||
|
||||
$strdate = $element->find('div p.datetime', 0)->plaintext;
|
||||
$item['timestamp'] = strtotime($strdate);
|
||||
|
||||
// detach from rest of page because remove() is buggy
|
||||
$element = str_get_html($element->outertext());
|
||||
$tags = $element->find('ul.required-tags', 0);
|
||||
foreach ($tags->childNodes() as $tag) {
|
||||
$item['categories'][] = html_entity_decode($tag->plaintext);
|
||||
}
|
||||
$tags->remove();
|
||||
$tags = $element->find('ul.tags', 0);
|
||||
foreach ($tags->childNodes() as $tag) {
|
||||
$item['categories'][] = html_entity_decode($tag->plaintext);
|
||||
}
|
||||
$tags->remove();
|
||||
|
||||
$item['content'] = implode('', $element->childNodes());
|
||||
|
||||
$chapters = $element->find('dl dd.chapters', 0);
|
||||
// bookmarked series and external works do not have a chapters count
|
||||
$chapters = (isset($chapters) ? $chapters->plaintext : 0);
|
||||
$item['uid'] = $item['uri'] . "/$strdate/$chapters";
|
||||
if ($this->getInput('unique')) {
|
||||
$item['uid'] = $item['uri'] . "/$strdate/$chapters";
|
||||
} else {
|
||||
$item['uid'] = $item['uri'];
|
||||
}
|
||||
|
||||
|
||||
// Fetch workskin of desired chapter(s) in list
|
||||
if ($this->getInput('range') && ($limit == 0 || $count++ < $limit)) {
|
||||
$url = $item['uri'];
|
||||
switch ($this->getInput('range')) {
|
||||
case ('all'):
|
||||
$url .= '?view_full_work=true';
|
||||
break;
|
||||
case ('first'):
|
||||
break;
|
||||
case ('last'):
|
||||
// only way to get this is using the navigate page unfortunately
|
||||
$url .= '/navigate';
|
||||
$response = getContents($url, $headers);
|
||||
$html = \str_get_html($response);
|
||||
$html = defaultLinkTo($html, self::URI);
|
||||
$url = $html->find('ol.index.group > li > a', -1)->href;
|
||||
break;
|
||||
}
|
||||
$response = getContents($url, $headers);
|
||||
|
||||
$html = \str_get_html($response);
|
||||
$html = defaultLinkTo($html, self::URI);
|
||||
// remove duplicate fic summary
|
||||
if ($ficsum = $html->find('#workskin > .preface > .summary', 0)) {
|
||||
$ficsum->remove();
|
||||
}
|
||||
$item['content'] .= $html->find('#workskin', 0);
|
||||
}
|
||||
|
||||
// Use predictability of download links to generate enclosures
|
||||
$wid = explode('/', $item['uri'])[4];
|
||||
foreach (['azw3', 'epub', 'mobi', 'pdf', 'html'] as $ext) {
|
||||
$item['enclosures'][] = 'https://archiveofourown.org/downloads/' . $wid . '/work.' . $ext;
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
@ -89,20 +174,31 @@ class AO3Bridge extends BridgeAbstract
|
||||
/**
|
||||
* Feed for recent chapters of a specific work.
|
||||
*/
|
||||
private function collectWork($id)
|
||||
private function collectWork($url)
|
||||
{
|
||||
$url = self::URI . "/works/$id/navigate";
|
||||
$response = _http_request($url, ['useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)']);
|
||||
$html = \str_get_html($response['body']);
|
||||
$version = 'v0.0.1';
|
||||
$headers = [
|
||||
"useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"
|
||||
];
|
||||
$response = getContents($url . '/navigate', $headers);
|
||||
|
||||
$html = \str_get_html($response);
|
||||
$html = defaultLinkTo($html, self::URI);
|
||||
|
||||
$response = getContents($url . '?view_full_work=true', $headers);
|
||||
|
||||
$workhtml = \str_get_html($response);
|
||||
$workhtml = defaultLinkTo($workhtml, self::URI);
|
||||
|
||||
$this->title = $html->find('h2 a', 0)->plaintext;
|
||||
|
||||
foreach ($html->find('ol.index.group > li') as $element) {
|
||||
$nav = $html->find('ol.index.group > li');
|
||||
for ($i = 0; $i < count($nav); $i++) {
|
||||
$item = [];
|
||||
|
||||
$element = $nav[$i];
|
||||
$item['title'] = $element->find('a', 0)->plaintext;
|
||||
$item['content'] = $element;
|
||||
$item['content'] = $workhtml->find('#chapter-' . ($i + 1), 0);
|
||||
$item['uri'] = $element->find('a', 0)->href;
|
||||
|
||||
$strdate = $element->find('span.datetime', 0)->plaintext;
|
||||
@ -131,4 +227,24 @@ class AO3Bridge extends BridgeAbstract
|
||||
{
|
||||
return self::URI . '/favicon.ico';
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$url = parent::getURI();
|
||||
switch ($this->queriedContext) {
|
||||
case 'Bookmarks':
|
||||
$user = $this->getInput('user');
|
||||
$url = self::URI
|
||||
. '/users/' . $user
|
||||
. '/bookmarks?bookmark_search[sort_column]=bookmarkable_date';
|
||||
break;
|
||||
case 'List':
|
||||
$url = $this->getInput('url');
|
||||
break;
|
||||
case 'Work':
|
||||
$url = self::URI . '/works/' . $this->getInput('id');
|
||||
break;
|
||||
}
|
||||
return $url;
|
||||
}
|
||||
}
|
||||
|
@ -63,11 +63,13 @@ class ARDAudiothekBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$oldTz = date_default_timezone_get();
|
||||
$path = $this->getInput('path');
|
||||
$limit = $this->getInput('limit');
|
||||
|
||||
$oldTz = date_default_timezone_get();
|
||||
date_default_timezone_set('Europe/Berlin');
|
||||
|
||||
$pathComponents = explode('/', $this->getInput('path'));
|
||||
$pathComponents = explode('/', $path);
|
||||
if (empty($pathComponents)) {
|
||||
returnClientError('Path may not be empty');
|
||||
}
|
||||
@ -82,17 +84,21 @@ class ARDAudiothekBridge extends BridgeAbstract
|
||||
}
|
||||
|
||||
$url = self::APIENDPOINT . 'programsets/' . $showID . '/';
|
||||
$rawJSON = getContents($url);
|
||||
$processedJSON = json_decode($rawJSON)->data->programSet;
|
||||
$json1 = getContents($url);
|
||||
$data1 = Json::decode($json1, false);
|
||||
$processedJSON = $data1->data->programSet;
|
||||
if (!$processedJSON) {
|
||||
throw new \Exception('Unable to find show id: ' . $showID);
|
||||
}
|
||||
|
||||
$limit = $this->getInput('limit');
|
||||
$answerLength = 1;
|
||||
$offset = 0;
|
||||
$numberOfElements = 1;
|
||||
|
||||
while ($answerLength != 0 && $offset < $numberOfElements && (is_null($limit) || $offset < $limit)) {
|
||||
$rawJSON = getContents($url . '?offset=' . $offset);
|
||||
$processedJSON = json_decode($rawJSON)->data->programSet;
|
||||
$json2 = getContents($url . '?offset=' . $offset);
|
||||
$data2 = Json::decode($json2, false);
|
||||
$processedJSON = $data2->data->programSet;
|
||||
|
||||
$answerLength = count($processedJSON->items->nodes);
|
||||
$offset = $offset + $answerLength;
|
||||
@ -113,7 +119,16 @@ class ARDAudiothekBridge extends BridgeAbstract
|
||||
$item['timestamp'] = $audio->publicationStartDateAndTime;
|
||||
$item['uid'] = $audio->id;
|
||||
$item['author'] = $audio->programSet->publicationService->title;
|
||||
$item['categories'] = [ $audio->programSet->editorialCategories->title ];
|
||||
|
||||
$category = $audio->programSet->editorialCategories->title ?? null;
|
||||
if ($category) {
|
||||
$item['categories'] = [$category];
|
||||
}
|
||||
|
||||
$item['itunes'] = [
|
||||
'duration' => $audio->duration,
|
||||
];
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
@ -40,6 +40,11 @@ class ARDMediathekBridge extends BridgeAbstract
|
||||
* @const IMAGEWIDTHPLACEHOLDER
|
||||
*/
|
||||
const IMAGEWIDTHPLACEHOLDER = '{width}';
|
||||
/**
|
||||
* Title of the current show
|
||||
* @var string
|
||||
*/
|
||||
private $title;
|
||||
|
||||
const PARAMETERS = [
|
||||
[
|
||||
@ -72,7 +77,7 @@ class ARDMediathekBridge extends BridgeAbstract
|
||||
}
|
||||
}
|
||||
|
||||
$url = self::APIENDPOINT . $showID . '/?pageSize=' . self::PAGESIZE;
|
||||
$url = self::APIENDPOINT . $showID . '?pageSize=' . self::PAGESIZE;
|
||||
$rawJSON = getContents($url);
|
||||
$processedJSON = json_decode($rawJSON);
|
||||
|
||||
@ -93,6 +98,17 @@ class ARDMediathekBridge extends BridgeAbstract
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
$this->title = $processedJSON->title;
|
||||
|
||||
date_default_timezone_set($oldTz);
|
||||
}
|
||||
|
||||
/** {@inheritdoc} */
|
||||
public function getName()
|
||||
{
|
||||
if (!empty($this->title)) {
|
||||
return $this->title;
|
||||
}
|
||||
return parent::getName();
|
||||
}
|
||||
}
|
||||
|
@ -20,17 +20,14 @@ class AcrimedBridge extends FeedExpander
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas(
|
||||
static::URI . 'spip.php?page=backend',
|
||||
$this->getInput('limit')
|
||||
);
|
||||
$url = 'https://www.acrimed.org/spip.php?page=backend';
|
||||
$limit = $this->getInput('limit');
|
||||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
$article = sanitize($articlePage->find('article.article1', 0)->innertext);
|
||||
$article = defaultLinkTo($article, static::URI);
|
||||
$item['content'] = $article;
|
||||
|
45
bridges/ActivisionResearchBridge.php
Normal file
45
bridges/ActivisionResearchBridge.php
Normal file
@ -0,0 +1,45 @@
|
||||
<?php
|
||||
|
||||
class ActivisionResearchBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Activision Research Blog';
|
||||
const URI = 'https://research.activision.com';
|
||||
const DESCRIPTION = 'Posts from the Activision Research blog';
|
||||
const MAINTAINER = 'thefranke';
|
||||
const CACHE_TIMEOUT = 86400; // 24h
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$dom = getSimpleHTMLDOM(static::URI);
|
||||
$dom = $dom->find('div[id="home-blog-feed"]', 0);
|
||||
if (!$dom) {
|
||||
throw new \Exception(sprintf('Unable to find css selector on `%s`', $url));
|
||||
}
|
||||
$dom = defaultLinkTo($dom, $this->getURI());
|
||||
foreach ($dom->find('div[class="blog-entry"]') as $article) {
|
||||
$a = $article->find('a', 0);
|
||||
|
||||
$blogimg = extractFromDelimiters($article->find('div[class="blog-img"]', 0)->style, 'url(', ')');
|
||||
|
||||
$title = htmlspecialchars_decode($article->find('div[class="title"]', 0)->plaintext);
|
||||
$author = htmlspecialchars_decode($article->find('div[class="author]', 0)->plaintext);
|
||||
$date = $article->find('div[class="pubdate"]', 0)->plaintext;
|
||||
|
||||
$entry = getSimpleHTMLDOMCached($a->href, static::CACHE_TIMEOUT * 7 * 4);
|
||||
$entry = defaultLinkTo($entry, $this->getURI());
|
||||
|
||||
$content = $entry->find('div[class="blog-body"]', 0);
|
||||
$tagsremove = ['script', 'iframe', 'input', 'form'];
|
||||
$content = sanitize($content, $tagsremove);
|
||||
$content = '<img src="' . static::URI . $blogimg . '" alt="">' . $content;
|
||||
|
||||
$this->items[] = [
|
||||
'title' => $title,
|
||||
'author' => $author,
|
||||
'uri' => $a->href,
|
||||
'content' => $content,
|
||||
'timestamp' => strtotime($date),
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
@ -32,8 +32,7 @@ class AirBreizhBridge extends BridgeAbstract
|
||||
public function collectData()
|
||||
{
|
||||
$html = '';
|
||||
$html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme'))
|
||||
or returnClientError('No results for this query.');
|
||||
$html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme'));
|
||||
|
||||
foreach ($html->find('article') as $article) {
|
||||
$item = [];
|
||||
|
@ -13,26 +13,42 @@ class AllegroBridge extends BridgeAbstract
|
||||
'exampleValue' => 'https://allegro.pl/kategoria/swieze-warzywa-cebula-318660',
|
||||
'required' => true,
|
||||
],
|
||||
'sessioncookie' => [
|
||||
'name' => 'The \'wdctx\' session cookie',
|
||||
'title' => 'Paste the value of the \'wdctx\' cookie from your browser if you want to prevent Allegro imposing rate limits',
|
||||
'pattern' => '^.{250,};?$',
|
||||
// phpcs:ignore
|
||||
'exampleValue' => 'v4.1-oCrmXTMqv2ppC21GTUCKLmUwRPP1ssQVALKuqwsZ1VXjcKgL2vO5TTRM5xMxS9GiyqxF1gAeyc-63dl0coUoBKXCXi_nAmr95yyqGpq2RAFoneZ4L399E8n6iYyemcuGARjAoSfjvLHJCEwvvHHynSgaxlFBu7hUnKfuy39zo9sSQdyTUjotJg3CAZ53q9v2raAnPCyGOAR4ytRILd9p24EJnxp7_oR0XbVPIo1hDa4WmjXFOxph8rHaO5tWd',
|
||||
'cookie' => [
|
||||
'name' => 'The complete cookie value',
|
||||
'title' => 'Paste the value of the cookie value from your browser if you want to prevent Allegro imposing rate limits',
|
||||
'required' => false,
|
||||
],
|
||||
'includeSponsoredOffers' => [
|
||||
'type' => 'checkbox',
|
||||
'name' => 'Include Sponsored Offers'
|
||||
'name' => 'Include Sponsored Offers',
|
||||
'defaultValue' => 'checked'
|
||||
],
|
||||
'includePromotedOffers' => [
|
||||
'type' => 'checkbox',
|
||||
'name' => 'Include Promoted Offers',
|
||||
'defaultValue' => 'checked'
|
||||
]
|
||||
]];
|
||||
|
||||
public function getName()
|
||||
{
|
||||
parse_str(parse_url($this->getInput('url'), PHP_URL_QUERY), $fields);
|
||||
$url = $this->getInput('url');
|
||||
if (!$url) {
|
||||
return parent::getName();
|
||||
}
|
||||
$parsedUrl = parse_url($url, PHP_URL_QUERY);
|
||||
if (!$parsedUrl) {
|
||||
return parent::getName();
|
||||
}
|
||||
parse_str($parsedUrl, $fields);
|
||||
|
||||
if ($query = array_key_exists('string', $fields) ? urldecode($fields['string']) : false) {
|
||||
return $query;
|
||||
if (array_key_exists('string', $fields)) {
|
||||
$f = urldecode($fields['string']);
|
||||
} else {
|
||||
$f = false;
|
||||
}
|
||||
if ($f) {
|
||||
return $f;
|
||||
}
|
||||
|
||||
return parent::getName();
|
||||
@ -51,9 +67,9 @@ class AllegroBridge extends BridgeAbstract
|
||||
|
||||
$opts = [];
|
||||
|
||||
// If a session cookie is provided
|
||||
if ($sessioncookie = $this->getInput('sessioncookie')) {
|
||||
$opts[CURLOPT_COOKIE] = 'wdctx=' . $sessioncookie;
|
||||
// If a cookie is provided
|
||||
if ($cookie = $this->getInput('cookie')) {
|
||||
$opts[CURLOPT_COOKIE] = $cookie;
|
||||
}
|
||||
|
||||
$html = getSimpleHTMLDOM($url, [], $opts);
|
||||
@ -63,58 +79,57 @@ class AllegroBridge extends BridgeAbstract
|
||||
return;
|
||||
}
|
||||
|
||||
$results = $html->find('._6a66d_V7Lel article');
|
||||
$results = $html->find('article[data-analytics-view-custom-context="REGULAR"]');
|
||||
|
||||
if (!$this->getInput('includeSponsoredOffers')) {
|
||||
$results = array_filter($results, function ($node) {
|
||||
return $node->{'data-analytics-view-label'} != 'showSponsoredItems';
|
||||
});
|
||||
if ($this->getInput('includeSponsoredOffers')) {
|
||||
$results = array_merge($results, $html->find('article[data-analytics-view-custom-context="SPONSORED"]'));
|
||||
}
|
||||
|
||||
if ($this->getInput('includePromotedOffers')) {
|
||||
$results = array_merge($results, $html->find('article[data-analytics-view-custom-context="PROMOTED"]'));
|
||||
}
|
||||
|
||||
foreach ($results as $post) {
|
||||
$item = [];
|
||||
|
||||
$item['uri'] = $post->find('._6a66d_LX75-', 0)->href;
|
||||
|
||||
//TODO: port this over, whatever it does, from https://github.com/MK-PL/AllegroRSS
|
||||
// if (arrayLinks.includes('events/clicks?')) {
|
||||
// let sponsoredLink = new URL(arrayLinks).searchParams.get('redirect')
|
||||
// arrayLinks = sponsoredLink.slice(0, sponsoredLink.indexOf('?'))
|
||||
// }
|
||||
|
||||
$item['title'] = $post->find('._6a66d_LX75-', 0)->innertext;
|
||||
|
||||
$item['uid'] = $post->{'data-analytics-view-value'};
|
||||
|
||||
$item_link = $post->find('a[href*="' . $item['uid'] . '"], a[href*="allegrolokalnie"]', 0);
|
||||
|
||||
$item['uri'] = $item_link->href;
|
||||
|
||||
$item['title'] = $item_link->find('img', 0)->alt;
|
||||
|
||||
$image = $item_link->find('img', 0)->{'data-src'} ?: $item_link->find('img', 0)->src ?? false;
|
||||
|
||||
if ($image) {
|
||||
$item['enclosures'] = [$image . '#.image'];
|
||||
}
|
||||
|
||||
$price = $post->{'data-analytics-view-json-custom-price'};
|
||||
if ($price) {
|
||||
$priceDecoded = json_decode(html_entity_decode($price));
|
||||
$price = $priceDecoded->amount . ' ' . $priceDecoded->currency;
|
||||
}
|
||||
|
||||
$descriptionPatterns = ['/<\s*dt[^>]*>\b/', '/<\/dt>/', '/<\s*dd[^>]*>\b/', '/<\/dd>/'];
|
||||
$descriptionReplacements = ['<span>', ':</span> ', '<strong>', ' </strong> '];
|
||||
$description = $post->find('.m7er_k4.mpof_5r.mpof_z0_s', 0)->innertext;
|
||||
$descriptionPretty = preg_replace($descriptionPatterns, $descriptionReplacements, $description);
|
||||
|
||||
$buyNowAuction = $post->find('.mqu1_g3.mvrt_0.mgn2_12', 0)->innertext ?? '';
|
||||
$buyNowAuction = str_replace('</span><span', '</span> <span', $buyNowAuction);
|
||||
|
||||
$auctionTimeLeft = $post->find('._6a66d_ImOzU', 0)->innertext ?? '';
|
||||
|
||||
$price = $post->find('._6a66d_6R3iN', 0)->plaintext;
|
||||
$price = empty($auctionTimeLeft) ? $price : $price . '- kwota licytacji';
|
||||
|
||||
$image = $post->find('._6a66d_44ioA img', 0)->{'data-src'} ?: $post->find('._6a66d_44ioA img', 0)->src ?? false;
|
||||
if ($image) {
|
||||
$item['enclosures'] = [$image . '#.image'];
|
||||
}
|
||||
|
||||
$offerExtraInfo = array_filter($post->find('.mqu1_g3.mgn2_12'), function ($node) {
|
||||
$pricingExtraInfo = array_filter($post->find('.mqu1_g3.mgn2_12'), function ($node) {
|
||||
return empty($node->find('.mvrt_0'));
|
||||
});
|
||||
|
||||
$offerExtraInfo = $offerExtraInfo[0]->plaintext ?? '';
|
||||
$pricingExtraInfo = $pricingExtraInfo[0]->plaintext ?? '';
|
||||
|
||||
$isSmart = $post->find('._6a66d_TC2Zk', 0)->innertext ?? '';
|
||||
if (str_contains($isSmart, 'z kurierem')) {
|
||||
$offerExtraInfo .= ', Smart z kurierem';
|
||||
} else {
|
||||
$offerExtraInfo .= ', Smart';
|
||||
$offerExtraInfo = array_map(function ($node) {
|
||||
return str_contains($node->plaintext, 'zapłać później') ? '' : $node->outertext;
|
||||
}, $post->find('div.mpof_ki.mwdn_1.mj7a_4.mgn2_12'));
|
||||
|
||||
$isSmart = $post->find('img[alt="Smart!"]', 0) ?? false;
|
||||
if ($isSmart) {
|
||||
$pricingExtraInfo .= $isSmart->outertext;
|
||||
}
|
||||
|
||||
$item['categories'] = [];
|
||||
@ -131,11 +146,9 @@ class AllegroBridge extends BridgeAbstract
|
||||
. '<div><strong>'
|
||||
. $price
|
||||
. '</strong></div><div>'
|
||||
. $auctionTimeLeft
|
||||
. '</div><div>'
|
||||
. $buyNowAuction
|
||||
. implode('</div><div>', $offerExtraInfo)
|
||||
. '</div><dl>'
|
||||
. $offerExtraInfo
|
||||
. $pricingExtraInfo
|
||||
. '</dl><hr>';
|
||||
|
||||
$this->items[] = $item;
|
||||
|
@ -24,6 +24,7 @@ class AllocineFRSortiesBridge extends BridgeAbstract
|
||||
$thumb = $element->find('figure.thumbnail', 0);
|
||||
$meta = $element->find('div.meta-body', 0);
|
||||
$synopsis = $element->find('div.synopsis', 0);
|
||||
$date = $element->find('span.date', 0);
|
||||
|
||||
$title = $element->find('a[class*=meta-title-link]', 0);
|
||||
$content = trim(defaultLinkTo($thumb->outertext . $meta->outertext . $synopsis->outertext, static::URI));
|
||||
@ -34,8 +35,32 @@ class AllocineFRSortiesBridge extends BridgeAbstract
|
||||
|
||||
$item['content'] = $content;
|
||||
$item['title'] = trim($title->innertext);
|
||||
$item['timestamp'] = $this->frenchPubDateToTimestamp($date->plaintext);
|
||||
$item['uri'] = static::BASE_URI . '/' . substr($title->href, 1);
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
private function frenchPubDateToTimestamp($date)
|
||||
{
|
||||
return strtotime(
|
||||
strtr(
|
||||
strtolower($date),
|
||||
[
|
||||
'janvier' => 'jan',
|
||||
'février' => 'feb',
|
||||
'mars' => 'march',
|
||||
'avril' => 'apr',
|
||||
'mai' => 'may',
|
||||
'juin' => 'jun',
|
||||
'juillet' => 'jul',
|
||||
'août' => 'aug',
|
||||
'septembre' => 'sep',
|
||||
'octobre' => 'oct',
|
||||
'novembre' => 'nov',
|
||||
'décembre' => 'dec'
|
||||
]
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -125,14 +125,13 @@ class AmazonPriceTrackerBridge extends BridgeAbstract
|
||||
*/
|
||||
private function getImage($html)
|
||||
{
|
||||
$image = 'https://placekitten.com/200/300';
|
||||
$imageSrc = $html->find('#main-image-container img', 0);
|
||||
|
||||
if ($imageSrc) {
|
||||
$hiresImage = $imageSrc->getAttribute('data-old-hires');
|
||||
$dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image');
|
||||
$image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute);
|
||||
}
|
||||
$image = $image ?: 'https://placekitten.com/200/300';
|
||||
|
||||
return <<<EOT
|
||||
<img width="300" style="max-width:300;max-height:300" src="$image" alt="{$this->title}" />
|
||||
@ -147,7 +146,7 @@ EOT;
|
||||
{
|
||||
$uri = $this->getURI();
|
||||
|
||||
return getSimpleHTMLDOM($uri) ?: returnServerError('Could not request Amazon.');
|
||||
return getSimpleHTMLDOM($uri);
|
||||
}
|
||||
|
||||
private function scrapePriceFromMetrics($html)
|
||||
|
278
bridges/AnfrBridge.php
Normal file
278
bridges/AnfrBridge.php
Normal file
@ -0,0 +1,278 @@
|
||||
<?php
|
||||
|
||||
class AnfrBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'ANFR';
|
||||
const URI = 'https://data.anfr.fr/';
|
||||
const DESCRIPTION = 'Fetches data from the French administration "Agence Nationale des Fréquences".';
|
||||
const CACHE_TIMEOUT = 604800; // 7d
|
||||
const MAINTAINER = 'quent1';
|
||||
const PARAMETERS = [
|
||||
'Données sur les réseaux mobiles' => [
|
||||
'departement' => [
|
||||
'name' => 'Département',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Tous' => null,
|
||||
'Ain' => '001',
|
||||
'Aisne' => '002',
|
||||
'Allier' => '003',
|
||||
'Alpes-de-Haute-Provence' => '004',
|
||||
'Hautes-Alpes' => '005',
|
||||
'Alpes-Maritimes' => '006',
|
||||
'Ardèche' => '007',
|
||||
'Ardennes' => '008',
|
||||
'Ariège' => '009',
|
||||
'Aube' => '010',
|
||||
'Aude' => '011',
|
||||
'Aveyron' => '012',
|
||||
'Bouches-du-Rhône' => '013',
|
||||
'Calvados' => '014',
|
||||
'Cantal' => '015',
|
||||
'Charente' => '016',
|
||||
'Charente-Maritime' => '017',
|
||||
'Cher' => '018',
|
||||
'Corrèze' => '019',
|
||||
'Corse-du-Sud' => '02A',
|
||||
'Haute-Corse' => '02B',
|
||||
'Côte-d\'Or' => '021',
|
||||
'Côtes-d\'Armor' => '022',
|
||||
'Creuse' => '023',
|
||||
'Dordogne' => '024',
|
||||
'Doubs' => '025',
|
||||
'Drôme' => '026',
|
||||
'Eure' => '027',
|
||||
'Eure-et-Loir' => '028',
|
||||
'Finistère' => '029',
|
||||
'Gard' => '030',
|
||||
'Haute-Garonne' => '031',
|
||||
'Gers' => '032',
|
||||
'Gironde' => '033',
|
||||
'Hérault' => '034',
|
||||
'Ille-et-Vilaine' => '035',
|
||||
'Indre' => '036',
|
||||
'Indre-et-Loire' => '037',
|
||||
'Isère' => '038',
|
||||
'Jura' => '039',
|
||||
'Landes' => '040',
|
||||
'Loir-et-Cher' => '041',
|
||||
'Loire' => '042',
|
||||
'Haute-Loire' => '043',
|
||||
'Loire-Atlantique' => '044',
|
||||
'Loiret' => '045',
|
||||
'Lot' => '046',
|
||||
'Lot-et-Garonne' => '047',
|
||||
'Lozère' => '048',
|
||||
'Maine-et-Loire' => '049',
|
||||
'Manche' => '050',
|
||||
'Marne' => '051',
|
||||
'Haute-Marne' => '052',
|
||||
'Mayenne' => '053',
|
||||
'Meurthe-et-Moselle' => '054',
|
||||
'Meuse' => '055',
|
||||
'Morbihan' => '056',
|
||||
'Moselle' => '057',
|
||||
'Nièvre' => '058',
|
||||
'Nord' => '059',
|
||||
'Oise' => '060',
|
||||
'Orne' => '061',
|
||||
'Pas-de-Calais' => '062',
|
||||
'Puy-de-Dôme' => '063',
|
||||
'Pyrénées-Atlantiques' => '064',
|
||||
'Hautes-Pyrénées' => '065',
|
||||
'Pyrénées-Orientales' => '066',
|
||||
'Bas-Rhin' => '067',
|
||||
'Haut-Rhin' => '068',
|
||||
'Rhône' => '069',
|
||||
'Haute-Saône' => '070',
|
||||
'Saône-et-Loire' => '071',
|
||||
'Sarthe' => '072',
|
||||
'Savoie' => '073',
|
||||
'Haute-Savoie' => '074',
|
||||
'Paris' => '075',
|
||||
'Seine-Maritime' => '076',
|
||||
'Seine-et-Marne' => '077',
|
||||
'Yvelines' => '078',
|
||||
'Deux-Sèvres' => '079',
|
||||
'Somme' => '080',
|
||||
'Tarn' => '081',
|
||||
'Tarn-et-Garonne' => '082',
|
||||
'Var' => '083',
|
||||
'Vaucluse' => '084',
|
||||
'Vendée' => '085',
|
||||
'Vienne' => '086',
|
||||
'Haute-Vienne' => '087',
|
||||
'Vosges' => '088',
|
||||
'Yonne' => '089',
|
||||
'Territoire de Belfort' => '090',
|
||||
'Essonne' => '091',
|
||||
'Hauts-de-Seine' => '092',
|
||||
'Seine-Saint-Denis' => '093',
|
||||
'Val-de-Marne' => '094',
|
||||
'Val-d\'Oise' => '095',
|
||||
'Guadeloupe' => '971',
|
||||
'Martinique' => '972',
|
||||
'Guyane' => '973',
|
||||
'La Réunion' => '974',
|
||||
'Saint-Pierre-et-Miquelon' => '975',
|
||||
'Mayotte' => '976',
|
||||
'Saint-Barthélemy' => '977',
|
||||
'Saint-Martin' => '978',
|
||||
'Terres australes et antarctiques françaises' => '984',
|
||||
'Wallis-et-Futuna' => '986',
|
||||
'Polynésie française' => '987',
|
||||
'Nouvelle-Calédonie' => '988',
|
||||
'Île de Clipperton' => '989'
|
||||
]
|
||||
],
|
||||
'generation' => [
|
||||
'name' => 'Génération',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Tous' => null,
|
||||
'2G' => '2G',
|
||||
'3G' => '3G',
|
||||
'4G' => '4G',
|
||||
'5G' => '5G',
|
||||
]
|
||||
],
|
||||
'operateur' => [
|
||||
'name' => 'Opérateur',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Tous' => null,
|
||||
'Bouygues Télécom' => 'BOUYGUES TELECOM',
|
||||
'Dauphin Télécom' => 'DAUPHIN TELECOM',
|
||||
'Digiciel' => 'DIGICEL',
|
||||
'Free Caraïbes' => 'FREE CARAIBES',
|
||||
'Free Mobile' => 'FREE MOBILE',
|
||||
'GLOBALTEL' => 'GLOBALTEL',
|
||||
'Office des postes et télécommunications de Nouvelle Calédonie' => 'Gouv Nelle Calédonie (OPT)',
|
||||
'Maore Mobile' => 'MAORE MOBILE',
|
||||
'ONATi' => 'ONATI',
|
||||
'Orange' => 'ORANGE',
|
||||
'Outremer Telecom' => 'OUTREMER TELECOM',
|
||||
'Vodafone polynésie' => 'PMT/VODAPHONE',
|
||||
'SFR' => 'SFR',
|
||||
'SPM Télécom' => 'SPM TELECOM',
|
||||
'Service des Postes et Télécommunications de Polynésie Française' => 'Gouv Nelle Calédonie (OPT)',
|
||||
'SRR' => 'SRR',
|
||||
'Station étrangère' => 'Station étrangère',
|
||||
'Telco OI' => 'TELCO IO',
|
||||
'United Telecommunication Services Caraïbes' => 'UTS Caraibes',
|
||||
'Ora Mobile' => 'VITI SAS',
|
||||
'Zeop' => 'ZEOP'
|
||||
]
|
||||
],
|
||||
'statut' => [
|
||||
'name' => 'Statut',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Tous' => null,
|
||||
'En service' => 'En service',
|
||||
'Projet approuvé' => 'Projet approuvé',
|
||||
'Techniquement opérationnel' => 'Techniquement opérationnel',
|
||||
]
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$urlParts = [
|
||||
'id' => 'observatoire_2g_3g_4g',
|
||||
'resource_id' => '88ef0887-6b0f-4d3f-8545-6d64c8f597da',
|
||||
'fields' => 'id,adm_lb_nom,sta_nm_dpt,emr_lb_systeme,generation,date_maj,sta_nm_anfr,adr_lb_lieu,adr_lb_add1,adr_lb_add2,adr_lb_add3,adr_nm_cp,statut',
|
||||
'rows' => 10000
|
||||
];
|
||||
|
||||
if (!empty($this->getInput('departement'))) {
|
||||
$urlParts['refine.sta_nm_dpt'] = urlencode($this->getInput('departement'));
|
||||
}
|
||||
|
||||
if (!empty($this->getInput('generation'))) {
|
||||
$urlParts['refine.generation'] = $this->getInput('generation');
|
||||
}
|
||||
|
||||
if (!empty($this->getInput('operateur'))) {
|
||||
// http_build_query() already does urlencoding so this call is redundant
|
||||
$urlParts['refine.adm_lb_nom'] = urlencode($this->getInput('operateur'));
|
||||
}
|
||||
|
||||
if (!empty($this->getInput('statut'))) {
|
||||
$urlParts['refine.statut'] = urlencode($this->getInput('statut'));
|
||||
}
|
||||
|
||||
// API seems to not play well with urlencoded data
|
||||
$url = urljoin(static::URI, '/d4c/api/records/1.0/download/?' . urldecode(http_build_query($urlParts)));
|
||||
|
||||
$json = getContents($url);
|
||||
$data = Json::decode($json, false);
|
||||
$records = $data->records;
|
||||
$frequenciesByStation = [];
|
||||
foreach ($records as $record) {
|
||||
if (!isset($frequenciesByStation[$record->fields->sta_nm_anfr])) {
|
||||
$street = sprintf(
|
||||
'%s %s %s',
|
||||
$record->fields->adr_lb_add1 ?? '',
|
||||
$record->fields->adr_lb_add2 ?? '',
|
||||
$record->fields->adr_lb_add3 ?? ''
|
||||
);
|
||||
$frequenciesByStation[$record->fields->sta_nm_anfr] = [
|
||||
'id' => $record->fields->sta_nm_anfr,
|
||||
'operator' => $record->fields->adm_lb_nom,
|
||||
'frequencies' => [],
|
||||
'lastUpdate' => 0,
|
||||
'address' => [
|
||||
'street' => trim($street),
|
||||
'postCode' => $record->fields->adr_nm_cp,
|
||||
'city' => $record->fields->adr_lb_lieu
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
$frequenciesByStation[$record->fields->sta_nm_anfr]['frequencies'][] = [
|
||||
'generation' => $record->fields->generation,
|
||||
'frequency' => $record->fields->emr_lb_systeme,
|
||||
'status' => $record->fields->statut,
|
||||
'updatedAt' => strtotime($record->fields->date_maj),
|
||||
];
|
||||
|
||||
$frequenciesByStation[$record->fields->sta_nm_anfr]['lastUpdate'] = max(
|
||||
$frequenciesByStation[$record->fields->sta_nm_anfr]['lastUpdate'],
|
||||
strtotime($record->fields->date_maj)
|
||||
);
|
||||
}
|
||||
|
||||
usort($frequenciesByStation, static fn ($a, $b) => $b['lastUpdate'] <=> $a['lastUpdate']);
|
||||
|
||||
foreach ($frequenciesByStation as $station) {
|
||||
$title = sprintf(
|
||||
'[%s] Mise à jour de la station n°%s à %s (%s)',
|
||||
$station['operator'],
|
||||
$station['id'],
|
||||
$station['address']['city'],
|
||||
$station['address']['postCode']
|
||||
);
|
||||
|
||||
$array_reduce = array_reduce($station['frequencies'], static function ($carry, $frequency) {
|
||||
return sprintf('%s<li>%s : %s</li>', $carry, $frequency['frequency'], $frequency['status']);
|
||||
}, '');
|
||||
|
||||
$content = sprintf(
|
||||
'<h1>Adresse complète</h1><p>%s<br>%s<br>%s</p><h1>Fréquences</h1><p><ul>%s</ul></p>',
|
||||
$station['address']['street'],
|
||||
$station['address']['postCode'],
|
||||
$station['address']['city'],
|
||||
$array_reduce
|
||||
);
|
||||
|
||||
$this->items[] = [
|
||||
'uid' => $station['id'],
|
||||
'timestamp' => $station['lastUpdate'],
|
||||
'title' => $title,
|
||||
'content' => $content,
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
87
bridges/AnisearchBridge.php
Normal file
87
bridges/AnisearchBridge.php
Normal file
@ -0,0 +1,87 @@
|
||||
<?php
|
||||
|
||||
class AnisearchBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'Tone866';
|
||||
const NAME = 'Anisearch';
|
||||
const URI = 'https://www.anisearch.de';
|
||||
const CACHE_TIMEOUT = 1800; // 30min
|
||||
const DESCRIPTION = 'Feed for Anisearch';
|
||||
const PARAMETERS = [[
|
||||
'category' => [
|
||||
'name' => 'Dub',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'DE'
|
||||
=> 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=de&sort=date&order=desc&view=4',
|
||||
'EN'
|
||||
=> 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=en&sort=date&order=desc&view=4',
|
||||
'JP'
|
||||
=> 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=ja&sort=date&order=desc&view=4'
|
||||
]
|
||||
],
|
||||
'trailers' => [
|
||||
'name' => 'Trailers',
|
||||
'type' => 'checkbox',
|
||||
'title' => 'Will include trailes',
|
||||
'defaultValue' => false
|
||||
]
|
||||
]];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$baseurl = 'https://www.anisearch.de/';
|
||||
$trailers = false;
|
||||
$trailers = $this->getInput('trailers');
|
||||
$limit = 10;
|
||||
if ($trailers) {
|
||||
$limit = 5;
|
||||
}
|
||||
|
||||
$dom = getSimpleHTMLDOM($this->getInput('category'));
|
||||
|
||||
foreach ($dom->find('li.btype0') as $key => $li) {
|
||||
if ($key >= $limit) {
|
||||
break;
|
||||
}
|
||||
|
||||
$a = $li->find('a', 0);
|
||||
$title = $a->find('span.title', 0);
|
||||
$url = $baseurl . $a->href;
|
||||
|
||||
//get article
|
||||
$domarticle = getSimpleHTMLDOM($url);
|
||||
$content = $domarticle->find('div.details-text', 0);
|
||||
|
||||
//get header-image and set absolute src
|
||||
$headerimage = $domarticle->find('img#details-cover', 0);
|
||||
$src = $headerimage->src;
|
||||
|
||||
foreach ($content->find('.hidden') as $element) {
|
||||
$element->remove();
|
||||
}
|
||||
|
||||
//get trailer
|
||||
$ytlink = '';
|
||||
if ($trailers) {
|
||||
$trailerlink = $domarticle->find('section#trailers > div > div.swiper > ul.swiper-wrapper > li.swiper-slide > a', 0);
|
||||
if (isset($trailerlink)) {
|
||||
$trailersite = getSimpleHTMLDOM($baseurl . $trailerlink->href);
|
||||
$trailer = $trailersite->find('div#video > iframe', 0);
|
||||
$trailer = $trailer->{'data-xsrc'};
|
||||
$ytlink = <<<EOT
|
||||
<br /><iframe width="560" height="315" src="$trailer" title="YouTube video player"
|
||||
frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
||||
referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
|
||||
EOT;
|
||||
}
|
||||
}
|
||||
|
||||
$this->items[] = [
|
||||
'title' => $title->plaintext,
|
||||
'uri' => $url,
|
||||
'content' => $headerimage . '<br />' . $content . $ytlink
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
183
bridges/AnnasArchiveBridge.php
Normal file
183
bridges/AnnasArchiveBridge.php
Normal file
@ -0,0 +1,183 @@
|
||||
<?php
|
||||
|
||||
class AnnasArchiveBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Anna\'s Archive';
|
||||
const MAINTAINER = 'phantop';
|
||||
const URI = 'https://annas-archive.org/';
|
||||
const DESCRIPTION = 'Returns books from Anna\'s Archive';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'q' => [
|
||||
'name' => 'Query',
|
||||
'exampleValue' => 'apothecary diaries',
|
||||
'required' => true,
|
||||
],
|
||||
'ext' => [
|
||||
'name' => 'Extension',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Any' => null,
|
||||
'azw3' => 'azw3',
|
||||
'cbr' => 'cbr',
|
||||
'cbz' => 'cbz',
|
||||
'djvu' => 'djvu',
|
||||
'epub' => 'epub',
|
||||
'fb2' => 'fb2',
|
||||
'fb2.zip' => 'fb2.zip',
|
||||
'mobi' => 'mobi',
|
||||
'pdf' => 'pdf',
|
||||
]
|
||||
],
|
||||
'lang' => [
|
||||
'name' => 'Language',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Any' => null,
|
||||
'Afrikaans [af]' => 'af',
|
||||
'Arabic [ar]' => 'ar',
|
||||
'Bangla [bn]' => 'bn',
|
||||
'Belarusian [be]' => 'be',
|
||||
'Bulgarian [bg]' => 'bg',
|
||||
'Catalan [ca]' => 'ca',
|
||||
'Chinese [zh]' => 'zh',
|
||||
'Church Slavic [cu]' => 'cu',
|
||||
'Croatian [hr]' => 'hr',
|
||||
'Czech [cs]' => 'cs',
|
||||
'Danish [da]' => 'da',
|
||||
'Dongxiang [sce]' => 'sce',
|
||||
'Dutch [nl]' => 'nl',
|
||||
'English [en]' => 'en',
|
||||
'French [fr]' => 'fr',
|
||||
'German [de]' => 'de',
|
||||
'Greek [el]' => 'el',
|
||||
'Hebrew [he]' => 'he',
|
||||
'Hindi [hi]' => 'hi',
|
||||
'Hungarian [hu]' => 'hu',
|
||||
'Indonesian [id]' => 'id',
|
||||
'Irish [ga]' => 'ga',
|
||||
'Italian [it]' => 'it',
|
||||
'Japanese [ja]' => 'ja',
|
||||
'Kazakh [kk]' => 'kk',
|
||||
'Korean [ko]' => 'ko',
|
||||
'Latin [la]' => 'la',
|
||||
'Latvian [lv]' => 'lv',
|
||||
'Lithuanian [lt]' => 'lt',
|
||||
'Luxembourgish [lb]' => 'lb',
|
||||
'Ndolo [ndl]' => 'ndl',
|
||||
'Norwegian [no]' => 'no',
|
||||
'Persian [fa]' => 'fa',
|
||||
'Polish [pl]' => 'pl',
|
||||
'Portuguese [pt]' => 'pt',
|
||||
'Romanian [ro]' => 'ro',
|
||||
'Russian [ru]' => 'ru',
|
||||
'Serbian [sr]' => 'sr',
|
||||
'Spanish [es]' => 'es',
|
||||
'Swedish [sv]' => 'sv',
|
||||
'Tamil [ta]' => 'ta',
|
||||
'Traditional Chinese [zh‑Hant]' => 'zh‑Hant',
|
||||
'Turkish [tr]' => 'tr',
|
||||
'Ukrainian [uk]' => 'uk',
|
||||
'Unknown language' => '_empty',
|
||||
'Unknown language [und]' => 'und',
|
||||
'Unknown language [urdu]' => 'urdu',
|
||||
'Urdu [ur]' => 'ur',
|
||||
'Vietnamese [vi]' => 'vi',
|
||||
'Welsh [cy]' => 'cy',
|
||||
]
|
||||
],
|
||||
'content' => [
|
||||
'name' => 'Type',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Any' => null,
|
||||
'Book (fiction)' => 'book_fiction',
|
||||
'Book (non‑fiction)' => 'book_nonfiction',
|
||||
'Book (unknown)' => 'book_unknown',
|
||||
'Comic book' => 'book_comic',
|
||||
'Journal article' => 'journal_article',
|
||||
'Magazine' => 'magazine',
|
||||
'Standards document' => 'standards_document',
|
||||
]
|
||||
],
|
||||
'src' => [
|
||||
'name' => 'Source',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Any' => null,
|
||||
'Internet Archive' => 'ia',
|
||||
'Libgen.li' => 'lgli',
|
||||
'Libgen.rs' => 'lgrs',
|
||||
'Sci‑Hub' => 'scihub',
|
||||
'Z‑Library' => 'zlib',
|
||||
]
|
||||
],
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getURI();
|
||||
$list = getSimpleHTMLDOMCached($url);
|
||||
$list = defaultLinkTo($list, self::URI);
|
||||
|
||||
// Don't attempt to do anything if not found message is given
|
||||
if ($list->find('.js-not-found-additional')) {
|
||||
return;
|
||||
}
|
||||
|
||||
$elements = $list->find('.w-full > .mb-4 > div');
|
||||
foreach ($elements as $element) {
|
||||
// stop added entries once partial match list starts
|
||||
if (str_contains($element->innertext, 'partial match')) {
|
||||
break;
|
||||
}
|
||||
if ($element = $element->find('a', 0)) {
|
||||
$item = [];
|
||||
$item['title'] = $element->find('h3', 0)->plaintext;
|
||||
$item['author'] = $element->find('div.italic', 0)->plaintext;
|
||||
$item['uri'] = $element->href;
|
||||
$item['content'] = $element->plaintext;
|
||||
$item['uid'] = $item['uri'];
|
||||
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri'], 86400 * 20);
|
||||
if ($item_html) {
|
||||
$item_html = defaultLinkTo($item_html, self::URI);
|
||||
$item['content'] .= $item_html->find('main img', 0);
|
||||
$item['content'] .= $item_html->find('main .mt-4', 0); // Summary
|
||||
foreach ($item_html->find('main ul.mb-4 > li > a.js-download-link') as $file) {
|
||||
if (!str_contains($file->href, 'fast_download')) {
|
||||
$item['enclosures'][] = $file->href;
|
||||
}
|
||||
}
|
||||
// Remove bulk torrents from enclosures list
|
||||
$item['enclosures'] = array_diff($item['enclosures'], [self::URI . 'datasets']);
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
$name = parent::getName();
|
||||
if ($this->getInput('q') != null) {
|
||||
$name .= ' - ' . $this->getInput('q');
|
||||
}
|
||||
return $name;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$params = array_filter([ // Filter to remove non-provided parameters
|
||||
'q' => $this->getInput('q'),
|
||||
'ext' => $this->getInput('ext'),
|
||||
'lang' => $this->getInput('lang'),
|
||||
'src' => $this->getInput('src'),
|
||||
'content' => $this->getInput('content'),
|
||||
]);
|
||||
$url = parent::getURI() . 'search?sort=newest&' . http_build_query($params);
|
||||
return $url;
|
||||
}
|
||||
}
|
@ -18,9 +18,45 @@ class AppleMusicBridge extends BridgeAbstract
|
||||
'required' => true,
|
||||
],
|
||||
]];
|
||||
const CACHE_TIMEOUT = 21600; // 6 hours
|
||||
const CACHE_TIMEOUT = 60 * 60 * 6; // 6 hours
|
||||
|
||||
private $title;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$items = $this->getJson();
|
||||
$artist = $this->getArtist($items);
|
||||
|
||||
$this->title = $artist->artistName;
|
||||
|
||||
foreach ($items as $item) {
|
||||
if ($item->wrapperType === 'collection') {
|
||||
$copyright = $item->copyright ?? '';
|
||||
$artworkUrl500 = str_replace('/100x100', '/500x500', $item->artworkUrl100);
|
||||
$artworkUrl2000 = str_replace('/100x100', '/2000x2000', $item->artworkUrl100);
|
||||
$escapedCollectionName = htmlspecialchars($item->collectionName);
|
||||
|
||||
$this->items[] = [
|
||||
'title' => $item->collectionName,
|
||||
'uri' => $item->collectionViewUrl,
|
||||
'timestamp' => $item->releaseDate,
|
||||
'enclosures' => $artworkUrl500,
|
||||
'author' => $item->artistName,
|
||||
'content' => "<figure>
|
||||
<img srcset=\"$item->artworkUrl60 60w, $item->artworkUrl100 100w, $artworkUrl500 500w, $artworkUrl2000 2000w\"
|
||||
sizes=\"100%\" src=\"$artworkUrl2000\"
|
||||
alt=\"Cover of $escapedCollectionName\"
|
||||
style=\"display: block; margin: 0 auto;\" />
|
||||
<figcaption>
|
||||
from <a href=\"$artist->artistLinkUrl\">$item->artistName</a><br />$copyright
|
||||
</figcaption>
|
||||
</figure>",
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function getJson()
|
||||
{
|
||||
# Limit the amount of releases to 50
|
||||
if ($this->getInput('limit') > 50) {
|
||||
@ -29,29 +65,53 @@ class AppleMusicBridge extends BridgeAbstract
|
||||
$limit = $this->getInput('limit');
|
||||
}
|
||||
|
||||
$url = 'https://itunes.apple.com/lookup?id='
|
||||
. $this->getInput('artist')
|
||||
. '&entity=album&limit='
|
||||
. $limit .
|
||||
'&sort=recent';
|
||||
$url = 'https://itunes.apple.com/lookup?id=' . $this->getInput('artist') . '&entity=album&limit=' . $limit . '&sort=recent';
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
|
||||
$json = json_decode($html);
|
||||
$result = $json->results;
|
||||
|
||||
foreach ($json->results as $obj) {
|
||||
if ($obj->wrapperType === 'collection') {
|
||||
$this->items[] = [
|
||||
'title' => $obj->artistName . ' - ' . $obj->collectionName,
|
||||
'uri' => $obj->collectionViewUrl,
|
||||
'timestamp' => $obj->releaseDate,
|
||||
'enclosures' => $obj->artworkUrl100,
|
||||
'content' => '<a href=' . $obj->collectionViewUrl
|
||||
. '><img src="' . $obj->artworkUrl100 . '" /></a><br><br>'
|
||||
. $obj->artistName . ' - ' . $obj->collectionName
|
||||
. '<br>'
|
||||
. $obj->copyright,
|
||||
];
|
||||
}
|
||||
if (!is_array($result) || count($result) == 0) {
|
||||
returnServerError('There is no artist with id "' . $this->getInput('artist') . '".');
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
private function getArtist($json)
|
||||
{
|
||||
$nameArray = array_filter($json, function ($obj) {
|
||||
return $obj->wrapperType == 'artist';
|
||||
});
|
||||
|
||||
if (count($nameArray) === 1) {
|
||||
return $nameArray[0];
|
||||
}
|
||||
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (isset($this->title)) {
|
||||
return $this->title;
|
||||
}
|
||||
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
if (empty($this->getInput('artist'))) {
|
||||
return parent::getIcon();
|
||||
}
|
||||
|
||||
// it isn't necessary to set the correct artist name into the url
|
||||
$url = 'https://music.apple.com/us/artist/jon-bellion/' . $this->getInput('artist');
|
||||
$html = getSimpleHTMLDOMCached($url);
|
||||
$image = $html->find('meta[property="og:image"]', 0)->content;
|
||||
|
||||
$imageUpdatedSize = preg_replace('/\/\d*x\d*cw/i', '/144x144-999', $image);
|
||||
|
||||
return $imageUpdatedSize;
|
||||
}
|
||||
}
|
||||
|
118
bridges/ArsTechnicaBridge.php
Normal file
118
bridges/ArsTechnicaBridge.php
Normal file
@ -0,0 +1,118 @@
|
||||
<?php
|
||||
|
||||
class ArsTechnicaBridge extends FeedExpander
|
||||
{
|
||||
const MAINTAINER = 'phantop';
|
||||
const NAME = 'Ars Technica';
|
||||
const URI = 'https://arstechnica.com/';
|
||||
const DESCRIPTION = 'Returns the latest articles from Ars Technica';
|
||||
const PARAMETERS = [[
|
||||
'section' => [
|
||||
'name' => 'Site section',
|
||||
'type' => 'list',
|
||||
'defaultValue' => 'index',
|
||||
'values' => [
|
||||
'All' => 'index',
|
||||
'Apple' => 'apple',
|
||||
'Board Games' => 'cardboard',
|
||||
'Cars' => 'cars',
|
||||
'Features' => 'features',
|
||||
'Gaming' => 'gaming',
|
||||
'Information Technology' => 'technology-lab',
|
||||
'Science' => 'science',
|
||||
'Staff Blogs' => 'staff-blogs',
|
||||
'Tech Policy' => 'tech-policy',
|
||||
'Tech' => 'gadgets',
|
||||
]
|
||||
]
|
||||
]];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://feeds.arstechnica.com/arstechnica/' . $this->getInput('section');
|
||||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item_html = defaultLinkTo($item_html, self::URI);
|
||||
|
||||
$content = '';
|
||||
$header = $item_html->find('article header', 0);
|
||||
$leading = $header->find('p[class*=leading]', 0);
|
||||
if ($leading != null) {
|
||||
$content .= '<p>' . $leading->innertext . '</p>';
|
||||
}
|
||||
$intro_image = $header->find('img.intro-image', 0);
|
||||
if ($intro_image != null) {
|
||||
$content .= '<figure>' . $intro_image;
|
||||
|
||||
$image_caption = $header->find('.caption .caption-content', 0);
|
||||
if ($image_caption != null) {
|
||||
$content .= '<figcaption>' . $image_caption->innertext . '</figcaption>';
|
||||
}
|
||||
$content .= '</figure>';
|
||||
}
|
||||
|
||||
foreach ($item_html->find('.post-content') as $content_tag) {
|
||||
$content .= $content_tag->innertext;
|
||||
}
|
||||
|
||||
$item['content'] = str_get_html($content);
|
||||
|
||||
$parsely = $item_html->find('[name="parsely-page"]', 0);
|
||||
$parsely_json = json_decode(html_entity_decode($parsely->content), true);
|
||||
$item['categories'] = $parsely_json['tags'];
|
||||
|
||||
// Some lightboxes are nested in figures. I'd guess that's a
|
||||
// bug in the website
|
||||
foreach ($item['content']->find('figure div div.ars-lightbox') as $weird_lightbox) {
|
||||
$weird_lightbox->parent->parent->outertext = $weird_lightbox;
|
||||
}
|
||||
|
||||
// It's easier to reconstruct the whole thing than remove
|
||||
// duplicate reactive tags
|
||||
foreach ($item['content']->find('.ars-lightbox') as $lightbox) {
|
||||
$lightbox_content = '';
|
||||
foreach ($lightbox->find('.ars-lightbox-item') as $lightbox_item) {
|
||||
$img = $lightbox_item->find('img', 0);
|
||||
if ($img != null) {
|
||||
$lightbox_content .= '<figure>' . $img;
|
||||
$caption = $lightbox_item->find('div.pswp-caption-content', 0);
|
||||
if ($caption != null) {
|
||||
$credit = $lightbox_item->find('div.ars-gallery-caption-credit', 0);
|
||||
if ($credit != null) {
|
||||
$credit->innertext = 'Credit: ' . $credit->innertext;
|
||||
}
|
||||
$lightbox_content .= '<figcaption>' . $caption->innertext . '</figcaption>';
|
||||
}
|
||||
$lightbox_content .= '</figure>';
|
||||
}
|
||||
}
|
||||
$lightbox->innertext = $lightbox_content;
|
||||
}
|
||||
|
||||
// remove various ars advertising
|
||||
foreach ($item['content']->find('.ars-interlude-container') as $ad) {
|
||||
$ad->remove();
|
||||
}
|
||||
foreach ($item['content']->find('.toc-container') as $toc) {
|
||||
$toc->remove();
|
||||
}
|
||||
|
||||
// Mostly YouTube videos
|
||||
$iframes = $item['content']->find('iframe');
|
||||
foreach ($iframes as $iframe) {
|
||||
$iframe->outertext = '<a href="' . $iframe->src . '">' . $iframe->src . '</a>';
|
||||
}
|
||||
// This fixed padding around the former iframes and actual inline videos
|
||||
foreach ($item['content']->find('div[style*=aspect-ratio]') as $styled) {
|
||||
$styled->removeAttribute('style');
|
||||
}
|
||||
|
||||
$item['content'] = backgroundToImg($item['content']);
|
||||
$item['uid'] = strval($parsely_json['post_id']);
|
||||
return $item;
|
||||
}
|
||||
}
|
@ -156,6 +156,10 @@ class Arte7Bridge extends BridgeAbstract
|
||||
. $element['mainImage']['url']
|
||||
. '" /></a>';
|
||||
|
||||
$item['itunes'] = [
|
||||
'duration' => $durationSeconds,
|
||||
];
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
@ -45,7 +45,6 @@ class AsahiShimbunAJWBridge extends BridgeAbstract
|
||||
|
||||
foreach ($html->find('#MainInner li a') as $element) {
|
||||
if ($element->parent()->class == 'HeadlineTopImage-S') {
|
||||
Debug::log('Skip Headline, it is repeated below');
|
||||
continue;
|
||||
}
|
||||
$item = [];
|
||||
|
@ -37,7 +37,8 @@ class AskfmBridge extends BridgeAbstract
|
||||
|
||||
$item['timestamp'] = strtotime($element->find('time', 0)->datetime);
|
||||
|
||||
$answer = trim($element->find('div.streamItem_content', 0)->innertext);
|
||||
$var = $element->find('div.streamItem_content', 0);
|
||||
$answer = trim($var->innertext ?? '');
|
||||
|
||||
// This probably should be cleaned up, especially for YouTube embeds
|
||||
if ($visual = $element->find('div.streamItem_visual', 0)) {
|
||||
|
@ -105,8 +105,7 @@ class AssociatedPressNewsBridge extends BridgeAbstract
|
||||
|
||||
private function collectCardData()
|
||||
{
|
||||
$json = getContents($this->getTagURI())
|
||||
or returnServerError('Could not request: ' . $this->getTagURI());
|
||||
$json = getContents($this->getTagURI());
|
||||
|
||||
$tagContents = json_decode($json, true);
|
||||
|
||||
|
@ -30,6 +30,9 @@ class AtmoNouvelleAquitaineBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
// this bridge is broken and unmaintained
|
||||
return;
|
||||
|
||||
$uri = self::URI . '/monair/commune/' . $this->getInput('cities');
|
||||
|
||||
$html = getSimpleHTMLDOM($uri);
|
||||
|
344
bridges/AuctionetBridge.php
Normal file
344
bridges/AuctionetBridge.php
Normal file
@ -0,0 +1,344 @@
|
||||
<?php
|
||||
|
||||
class AuctionetBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Auctionet';
|
||||
const URI = 'https://www.auctionet.com';
|
||||
const DESCRIPTION = 'Fetches info about auction objects from Auctionet (an auction platform for many European auction houses)';
|
||||
const MAINTAINER = 'Qluxzz';
|
||||
const PARAMETERS = [[
|
||||
'category' => [
|
||||
'name' => 'Category',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'All categories' => '',
|
||||
'Art' => [
|
||||
'All' => '25-art',
|
||||
'Drawings' => '119-drawings',
|
||||
'Engravings & Prints' => '27-engravings-prints',
|
||||
'Other' => '30-other',
|
||||
'Paintings' => '28-paintings',
|
||||
'Photography' => '26-photography',
|
||||
'Sculptures & Bronzes' => '29-sculptures-bronzes',
|
||||
],
|
||||
'Asiatica' => [
|
||||
'All' => '117-asiatica',
|
||||
],
|
||||
'Books, Maps & Manuscripts' => [
|
||||
'All' => '50-books-maps-manuscripts',
|
||||
'Autographs & Manuscripts' => '206-autographs-manuscripts',
|
||||
'Books' => '204-books',
|
||||
'Maps' => '205-maps',
|
||||
'Other' => '207-other',
|
||||
],
|
||||
'Carpets & Textiles' => [
|
||||
'All' => '35-carpets-textiles',
|
||||
'Carpets' => '36-carpets',
|
||||
'Textiles' => '37-textiles',
|
||||
],
|
||||
'Ceramics & Porcelain' => [
|
||||
'All' => '9-ceramics-porcelain',
|
||||
'European' => '10-european',
|
||||
'Oriental' => '11-oriental',
|
||||
'Rest of the world' => '12-rest-of-the-world',
|
||||
'Tableware' => '210-tableware',
|
||||
],
|
||||
'Clocks & Watches' => [
|
||||
'All' => '31-clocks-watches',
|
||||
'Carriage & Miniature Clocks' => '258-carriage-miniature-clocks',
|
||||
'Longcase clocks' => '32-longcase-clocks',
|
||||
'Mantel clocks' => '33-mantel-clocks',
|
||||
'Other clocks' => '34-other-clocks',
|
||||
'Pocket & Stop Watches' => '110-pocket-stop-watches',
|
||||
'Wall Clocks' => '127-wall-clocks',
|
||||
'Wristwatches' => '15-wristwatches',
|
||||
],
|
||||
'Coins, Medals & Stamps' => [
|
||||
'All' => '46-coins-medals-stamps',
|
||||
'Coins' => '128-coins',
|
||||
'Orders & Medals' => '135-orders-medals',
|
||||
'Other' => '131-other',
|
||||
'Stamps' => '136-stamps',
|
||||
],
|
||||
'Folk art' => [
|
||||
'All' => '58-folk-art',
|
||||
'Bowls & Boxes' => '121-bowls-boxes',
|
||||
'Furniture' => '122-furniture',
|
||||
'Other' => '123-other',
|
||||
'Tools & Gears' => '120-tools-gears',
|
||||
],
|
||||
'Furniture' => [
|
||||
'All' => '16-furniture',
|
||||
'Armchairs & Chairs' => '18-armchairs-chairs',
|
||||
'Chests of drawers' => '24-chests-of-drawers',
|
||||
'Cupboards, Cabinets & Shelves' => '23-cupboards-cabinets-shelves',
|
||||
'Dining room furniture' => '22-dining-room-furniture',
|
||||
'Garden' => '21-garden',
|
||||
'Other' => '17-other',
|
||||
'Sofas & seatings' => '20-sofas-seatings',
|
||||
'Tables' => '19-tables',
|
||||
],
|
||||
'Glass' => [
|
||||
'All' => '6-glass',
|
||||
'Art glass' => '208-art-glass',
|
||||
'Other' => '8-other',
|
||||
'Tableware' => '7-tableware',
|
||||
'Utility glass' => '209-utility-glass',
|
||||
],
|
||||
'Jewellery & Gemstones' => [
|
||||
'All' => '13-jewellery-gemstones',
|
||||
'Alliance rings' => '113-alliance-rings',
|
||||
'Bracelets' => '106-bracelets',
|
||||
'Brooches & Pendants' => '107-brooches-pendants',
|
||||
'Costume Jewellery' => '259-costume-jewellery',
|
||||
'Cufflinks & Tie Pins' => '111-cufflinks-tie-pins',
|
||||
'Ear studs' => '116-ear-studs',
|
||||
'Earrings' => '115-earrings',
|
||||
'Gemstones' => '48-gemstones',
|
||||
'Jewellery' => '14-jewellery',
|
||||
'Jewellery Suites' => '109-jewellery-suites',
|
||||
'Necklace' => '104-necklace',
|
||||
'Other' => '118-other',
|
||||
'Rings' => '112-rings',
|
||||
'Signet rings' => '105-signet-rings',
|
||||
'Solitaire rings' => '114-solitaire-rings',
|
||||
],
|
||||
'Licence weapons' => [
|
||||
'All' => '59-licence-weapons',
|
||||
'Combi/Combo' => '63-combi-combo',
|
||||
'Double express rifles' => '60-double-express-rifles',
|
||||
'Rifles' => '61-rifles',
|
||||
'Shotguns' => '62-shotguns',
|
||||
],
|
||||
'Lighting & Lamps' => [
|
||||
'All' => '1-lighting-lamps',
|
||||
'Candlesticks' => '4-candlesticks',
|
||||
'Ceiling lights' => '3-ceiling-lights',
|
||||
'Chandeliers' => '203-chandeliers',
|
||||
'Floor lights' => '2-floor-lights',
|
||||
'Other lighting' => '5-other-lighting',
|
||||
'Table Lamps' => '125-table-lamps',
|
||||
'Wall Lights' => '124-wall-lights',
|
||||
],
|
||||
'Mirrors' => [
|
||||
'All' => '42-mirrors',
|
||||
],
|
||||
'Miscellaneous' => [
|
||||
'All' => '43-miscellaneous',
|
||||
'Fishing equipment' => '54-fishing-equipment',
|
||||
'Miscellaneous' => '47-miscellaneous',
|
||||
'Modern Tools' => '133-modern-tools',
|
||||
'Modern consumer electronics' => '52-modern-consumer-electronics',
|
||||
'Musical instruments' => '51-musical-instruments',
|
||||
'Technica & Nautica' => '45-technica-nautica',
|
||||
],
|
||||
'Photo, Cameras & Lenses' => [
|
||||
'All' => '57-photo-cameras-lenses',
|
||||
'Cameras & accessories' => '71-cameras-accessories',
|
||||
'Optics' => '66-optics',
|
||||
'Other' => '72-other',
|
||||
],
|
||||
'Silver & Metals' => [
|
||||
'All' => '38-silver-metals',
|
||||
'Other metals' => '40-other-metals',
|
||||
'Pewter, Brass & Copper' => '41-pewter-brass-copper',
|
||||
'Silver' => '39-silver',
|
||||
'Silver plated' => '213-silver-plated',
|
||||
],
|
||||
'Toys' => [
|
||||
'All' => '44-toys',
|
||||
'Comics' => '211-comics',
|
||||
'Toys' => '212-toys',
|
||||
],
|
||||
'Tribal art' => [
|
||||
'All' => '134-tribal-art',
|
||||
],
|
||||
'Vehicles, Boats & Parts' => [
|
||||
'All' => '249-vehicles-boats-parts',
|
||||
'Automobilia & Transport' => '255-automobilia-transport',
|
||||
'Bicycles' => '132-bicycles',
|
||||
'Boats & Accessories' => '250-boats-accessories',
|
||||
'Car parts' => '253-car-parts',
|
||||
'Cars' => '215-cars',
|
||||
'Moped parts' => '254-moped-parts',
|
||||
'Mopeds' => '216-mopeds',
|
||||
'Motorcycle parts' => '252-motorcycle-parts',
|
||||
'Motorcycles' => '251-motorcycles',
|
||||
'Other' => '256-other',
|
||||
],
|
||||
'Vintage & Designer Fashion' => [
|
||||
'All' => '49-vintage-designer-fashion',
|
||||
],
|
||||
'Weapons & Militaria' => [
|
||||
'All' => '137-weapons-militaria',
|
||||
'Airguns' => '257-airguns',
|
||||
'Armour & Uniform' => '138-armour-uniform',
|
||||
'Edged weapons' => '130-edged-weapons',
|
||||
'Guns & Rifles' => '129-guns-rifles',
|
||||
'Other' => '214-other',
|
||||
],
|
||||
'Wine, Port & Spirits' => [
|
||||
'All' => '170-wine-port-spirits',
|
||||
],
|
||||
]
|
||||
],
|
||||
'sort_order' => [
|
||||
'name' => 'Sort order',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Most bids' => 'bids_count_desc',
|
||||
'Lowest bid' => 'bid_asc',
|
||||
'Highest bid' => 'bid_desc',
|
||||
'Last bid on' => 'bid_on',
|
||||
'Ending soonest' => 'end_asc_active',
|
||||
'Lowest estimate' => 'estimate_asc',
|
||||
'Highest estimate' => 'estimate_desc',
|
||||
'Recently added' => 'recent'
|
||||
],
|
||||
],
|
||||
'country' => [
|
||||
'name' => 'Country',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'All' => '',
|
||||
'Denmark' => 'DK',
|
||||
'Finland' => 'FI',
|
||||
'Germany' => 'DE',
|
||||
'Spain' => 'ES',
|
||||
'Sweden' => 'SE',
|
||||
'United Kingdom' => 'GB'
|
||||
]
|
||||
],
|
||||
'language' => [
|
||||
'name' => 'Language',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'English' => 'en',
|
||||
'Español' => 'es',
|
||||
'Deutsch' => 'de',
|
||||
'Svenska' => 'sv',
|
||||
'Dansk' => 'da',
|
||||
'Suomi' => 'fi',
|
||||
],
|
||||
],
|
||||
]];
|
||||
|
||||
const CACHE_TIMEOUT = 3600; // 1 hour
|
||||
|
||||
private $title;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
// Each page contains 48 auctions
|
||||
// So we fetch 10 pages so we decrease the likelihood
|
||||
// of missing auctions between feed refreshes
|
||||
|
||||
// Fetch first page and use that to get title
|
||||
{
|
||||
$url = $this->getUrl(1);
|
||||
$data = getContents($url);
|
||||
|
||||
$title = $this->getDocumentTitle($data);
|
||||
|
||||
$this->items = array_merge($this->items, $this->parsePageData($data));
|
||||
}
|
||||
|
||||
// Fetch remaining pages
|
||||
for ($page = 2; $page <= 10; $page++) {
|
||||
$url = $this->getUrl($page);
|
||||
|
||||
$data = getContents($url);
|
||||
|
||||
$this->items = array_merge($this->items, $this->parsePageData($data));
|
||||
}
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
return $this->title ?: parent::getName();
|
||||
}
|
||||
|
||||
|
||||
/* HELPERS */
|
||||
|
||||
private function getUrl($page)
|
||||
{
|
||||
$category = $this->getInput('category');
|
||||
$language = $this->getInput('language');
|
||||
$sort_order = $this->getInput('sort_order');
|
||||
$country = $this->getInput('country');
|
||||
|
||||
$url = self::URI . '/' . $language . '/search';
|
||||
|
||||
if ($category) {
|
||||
$url = $url . '/' . $category;
|
||||
}
|
||||
|
||||
$query = [];
|
||||
$query['page'] = $page;
|
||||
|
||||
if ($sort_order) {
|
||||
$query['order'] = $sort_order;
|
||||
}
|
||||
|
||||
if ($country) {
|
||||
$query['country_code'] = $country;
|
||||
}
|
||||
|
||||
if (count($query) > 0) {
|
||||
$url = $url . '?' . http_build_query($query);
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
private function getDocumentTitle($data)
|
||||
{
|
||||
$title_elem = '<title>';
|
||||
$title_elem_length = strlen($title_elem);
|
||||
$title_start = strpos($data, $title_elem);
|
||||
$title_end = strpos($data, '</title>', $title_start);
|
||||
$title_length = $title_end - $title_start + strlen($title_elem);
|
||||
$title = substr($data, $title_start + strlen($title_elem), $title_length);
|
||||
|
||||
return $title;
|
||||
}
|
||||
|
||||
/**
|
||||
* The auction items data is included in the HTML document
|
||||
* as a HTML entities encoded JSON structure
|
||||
* which is used to hydrate the React component for the list of auctions
|
||||
*/
|
||||
private function parsePageData($data)
|
||||
{
|
||||
$key = 'data-react-props="';
|
||||
$keyLength = strlen($key);
|
||||
|
||||
$start = strpos($data, $key);
|
||||
$end = strpos($data, '"', $start + strlen($key));
|
||||
$length = $end - ($start + $keyLength);
|
||||
|
||||
$jsonString = substr($data, $start + $keyLength, $length);
|
||||
|
||||
$jsonData = json_decode(htmlspecialchars_decode($jsonString), false);
|
||||
|
||||
$items = [];
|
||||
|
||||
foreach ($jsonData->{'items'} as $item) {
|
||||
$title = $item->{'longTitle'};
|
||||
$relative_url = $item->{'url'};
|
||||
$images = $item->{'imageUrls'};
|
||||
$id = $item->{'auctionId'};
|
||||
|
||||
$items[] = [
|
||||
'title' => $title,
|
||||
'uri' => self::URI . $relative_url,
|
||||
'uid' => $id,
|
||||
'content' => count($images) > 0 ? "<img src='$images[0]'/><br/>$title" : $title,
|
||||
'enclosures' => array_slice($images, 1),
|
||||
];
|
||||
}
|
||||
|
||||
return $items;
|
||||
}
|
||||
}
|
@ -13,12 +13,20 @@ class AutoJMBridge extends BridgeAbstract
|
||||
'type' => 'text',
|
||||
'required' => true,
|
||||
'title' => 'URL d\'une recherche avec filtre de véhicules sans le http://www.autojm.fr/',
|
||||
'exampleValue' => 'recherche?brands[]=peugeot&ranges[]=peugeot-nouvelle-308-2021-5p'
|
||||
'exampleValue' => 'recherche?brands[]=PEUGEOT&ranges[]=PEUGEOT 308'
|
||||
],
|
||||
]
|
||||
];
|
||||
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
|
||||
const TEST_DETECT_PARAMETERS = [
|
||||
'https://www.autojm.fr/recherche?brands%5B%5D=PEUGEOT&ranges%5B%5D=PEUGEOT%20308'
|
||||
=> ['url' => 'recherche?brands%5B%5D=PEUGEOT&ranges%5B%5D=PEUGEOT%20308',
|
||||
'context' => 'Afficher les offres de véhicules disponible sur la recheche AutoJM'
|
||||
]
|
||||
];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return self::URI . 'favicon.ico';
|
||||
@ -35,6 +43,17 @@ class AutoJMBridge extends BridgeAbstract
|
||||
}
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
switch ($this->queriedContext) {
|
||||
case 'Afficher les offres de véhicules disponible sur la recheche AutoJM':
|
||||
return self::URI . $this->getInput('url');
|
||||
break;
|
||||
default:
|
||||
return self::URI;
|
||||
}
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
// Get the number of result for this search
|
||||
@ -52,7 +71,7 @@ class AutoJMBridge extends BridgeAbstract
|
||||
$data = json_decode($json);
|
||||
|
||||
$nb_results = $data->nbResults;
|
||||
$total_pages = ceil($nb_results / 15);
|
||||
$total_pages = ceil($nb_results / 14);
|
||||
|
||||
// Limit the number of page to analyse to 10
|
||||
for ($page = 1; $page <= $total_pages && $page <= 10; $page++) {
|
||||
@ -66,8 +85,8 @@ class AutoJMBridge extends BridgeAbstract
|
||||
$image = $car->find('div[class=card-car__header__img]', 0)->find('img', 0)->src;
|
||||
// Decode HTML attribute JSON data
|
||||
$car_data = json_decode(html_entity_decode($car->{'data-layer'}));
|
||||
$car_model = $car->{'data-title'} . ' ' . $car->{'data-suptitle'};
|
||||
$availability = $car->find('div[class=card-car__modalites]', 0)->find('div[class=col]', 0)->plaintext;
|
||||
$car_model = $car_data->title;
|
||||
$availability = $car->find('div[class*=card-car__modalites]', 0)->find('div[class=col]', 0)->plaintext;
|
||||
$warranty = $car->find('div[data-type=WarrantyCard]', 0)->plaintext;
|
||||
$discount_html = $car->find('div[class=subtext vehicle_reference_element]', 0);
|
||||
// Check if there is any discount info displayed
|
||||
@ -132,4 +151,18 @@ class AutoJMBridge extends BridgeAbstract
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
$params = [];
|
||||
$regex = '/^(https?:\/\/)?(www\.|)autojm.fr\/(recherche\?.*|recherche\/[0-9]{1,10}\?.*)$/m';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$url = preg_replace('#(recherche|recherche/[0-9]{1,10})#', 'recherche', $matches[3]);
|
||||
|
||||
$params['url'] = $url;
|
||||
$params['context'] = 'Afficher les offres de véhicules disponible sur la recheche AutoJM';
|
||||
|
||||
return $params;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -14,29 +14,10 @@ class AwwwardsBridge extends BridgeAbstract
|
||||
|
||||
private $sites = [];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.awwwards.com/favicon.ico';
|
||||
}
|
||||
|
||||
private function fetchSites()
|
||||
{
|
||||
Debug::log('Fetching all sites');
|
||||
$sites = getSimpleHTMLDOM(self::SITESURI);
|
||||
|
||||
Debug::log('Parsing all JSON data');
|
||||
foreach ($sites->find('.grid-sites li') as $site) {
|
||||
$decode = html_entity_decode($site->attr['data-collectable-model-value'], ENT_QUOTES, 'utf-8');
|
||||
$decode = json_decode($decode, true);
|
||||
$this->sites[] = $decode;
|
||||
}
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->fetchSites();
|
||||
|
||||
Debug::log('Building RSS feed');
|
||||
foreach ($this->sites as $site) {
|
||||
$item = [];
|
||||
$item['title'] = $site['title'];
|
||||
@ -56,4 +37,23 @@ class AwwwardsBridge extends BridgeAbstract
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.awwwards.com/favicon.ico';
|
||||
}
|
||||
|
||||
private function fetchSites()
|
||||
{
|
||||
$sites = getSimpleHTMLDOM(self::SITESURI);
|
||||
foreach ($sites->find('.grid-sites li') as $li) {
|
||||
$encodedJson = $li->attr['data-collectable-model-value'] ?? null;
|
||||
if (!$encodedJson) {
|
||||
continue;
|
||||
}
|
||||
$json = html_entity_decode($encodedJson, ENT_QUOTES, 'utf-8');
|
||||
$site = Json::decode($json);
|
||||
$this->sites[] = $site;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ class BAEBridge extends BridgeAbstract
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getURI();
|
||||
$html = getSimpleHTMLDOM($url) or returnClientError('No results for this query.');
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
|
||||
$annonces = $html->find('main article');
|
||||
foreach ($annonces as $annonce) {
|
||||
|
254
bridges/BMDSystemhausBlogBridge.php
Normal file
254
bridges/BMDSystemhausBlogBridge.php
Normal file
@ -0,0 +1,254 @@
|
||||
<?php
|
||||
|
||||
class BMDSystemhausBlogBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'cn-tools';
|
||||
const NAME = 'BMD SYSTEMHAUS GesmbH';
|
||||
const CACHE_TIMEOUT = 21600; //6h
|
||||
const URI = 'https://www.bmd.com';
|
||||
const DONATION_URI = 'https://paypal.me/cntools';
|
||||
const DESCRIPTION = 'BMD Systemhaus - We make business easy';
|
||||
const BMD_FAV_ICON = 'https://www.bmd.com/favicon.ico';
|
||||
|
||||
const ITEMSTYLE = [
|
||||
'ilcr' => '<table width="100%"><tr><td style="vertical-align: top;">{data_img}</td><td style="vertical-align: top;">{data_content}</td></tr></table>',
|
||||
'clir' => '<table width="100%"><tr><td style="vertical-align: top;">{data_content}</td><td style="vertical-align: top;">{data_img}</td></tr></table>',
|
||||
'itcb' => '<div>{data_img}<br />{data_content}</div>',
|
||||
'ctib' => '<div>{data_content}<br />{data_img}</div>',
|
||||
'co' => '{data_content}',
|
||||
'io' => '{data_img}'
|
||||
];
|
||||
|
||||
const PARAMETERS = [
|
||||
'Blog' => [
|
||||
'country' => [
|
||||
'name' => 'Country',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Österreich' => 'at',
|
||||
'Deutschland' => 'de',
|
||||
'Schweiz' => 'ch',
|
||||
'Slovensko' => 'sk',
|
||||
'Cesko' => 'cz',
|
||||
'Hungary' => 'hu',
|
||||
],
|
||||
'defaultValue' => 'at',
|
||||
],
|
||||
'style' => [
|
||||
'name' => 'Style',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Image left, content right' => 'ilcr',
|
||||
'Content left, image right' => 'clir',
|
||||
'Image top, content bottom' => 'itcb',
|
||||
'Content top, image bottom' => 'ctib',
|
||||
'Content only' => 'co',
|
||||
'Image only' => 'io',
|
||||
],
|
||||
'defaultValue' => 'ilcr',
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
//-----------------------------------------------------
|
||||
public function collectData()
|
||||
{
|
||||
// get website content
|
||||
$html = getSimpleHTMLDOM($this->getURI());
|
||||
|
||||
// Convert relative links in HTML into absolute links
|
||||
$html = defaultLinkTo($html, self::URI);
|
||||
|
||||
// Convert lazy-loading images and frames (video embeds) into static elements
|
||||
$html = convertLazyLoading($html);
|
||||
|
||||
foreach ($html->find('div#bmdNewsList div#bmdNewsList-Item') as $element) {
|
||||
$itemScope = $element->find('div[itemscope=itemscope]', 0);
|
||||
|
||||
$item = [];
|
||||
|
||||
// set base article data
|
||||
$item['title'] = $this->getMetaItemPropContent($itemScope, 'headline');
|
||||
$item['timestamp'] = strtotime($this->getMetaItemPropContent($itemScope, 'datePublished'));
|
||||
$item['author'] = $this->getMetaItemPropContent($itemScope->find('div[itemprop=author]', 0), 'name');
|
||||
|
||||
// find article image
|
||||
$imageTag = '';
|
||||
$image = $element->find('div.mediaelement.mediaelement-image img', 0);
|
||||
if ((!is_null($image)) and ($image->src != '')) {
|
||||
$item['enclosures'] = [$image->src];
|
||||
$imageTag = '<img src="' . $image->src . '"/>';
|
||||
}
|
||||
|
||||
// begin with right style
|
||||
$content = self::ITEMSTYLE[$this->getInput('style')];
|
||||
|
||||
// render placeholder
|
||||
$content = str_replace('{data_content}', $this->getMetaItemPropContent($itemScope, 'description'), $content);
|
||||
$content = str_replace('{data_img}', $imageTag, $content);
|
||||
|
||||
// set finished content
|
||||
$item['content'] = $content;
|
||||
|
||||
// get link to article
|
||||
$link = $element->find('div#bmdNewsList-Text div#bmdNewsList-Title a', 0);
|
||||
if (!is_null($link)) {
|
||||
$item['uri'] = $link->href;
|
||||
}
|
||||
|
||||
// init categories
|
||||
$categories = [];
|
||||
$tmpOne = [];
|
||||
$tmpTwo = [];
|
||||
|
||||
// search first categorie span
|
||||
$catElem = $element->find('div#bmdNewsList-Text div#bmdNewsList-Category span.news-list-category', 0);
|
||||
$txt = trim($catElem->innertext);
|
||||
$tmpOne = explode('/', $txt);
|
||||
|
||||
// split by 2 spaces
|
||||
foreach ($tmpOne as $tmpElem) {
|
||||
$tmpElem = trim($tmpElem);
|
||||
$tmpData = preg_split('/ /', $tmpElem);
|
||||
$tmpTwo = array_merge($tmpTwo, $tmpData);
|
||||
}
|
||||
|
||||
// split by tabulator
|
||||
foreach ($tmpTwo as $tmpElem) {
|
||||
$tmpElem = trim($tmpElem);
|
||||
$tmpData = preg_split('/\t+/', $tmpElem);
|
||||
$categories = array_merge($categories, $tmpData);
|
||||
}
|
||||
|
||||
// trim each categorie entries
|
||||
$categories = array_map('trim', $categories);
|
||||
|
||||
// remove empty entries
|
||||
$categories = array_filter($categories, function ($value) {
|
||||
return !is_null($value) && $value !== '';
|
||||
});
|
||||
|
||||
// set categories
|
||||
if (count($categories) > 0) {
|
||||
$item['categories'] = $categories;
|
||||
}
|
||||
|
||||
// add item
|
||||
if (($item['title'] != '') and ($item['content'] != '') and ($item['uri'] != '')) {
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
public function detectParameters($url)
|
||||
{
|
||||
try {
|
||||
$parsedUrl = Url::fromString($url);
|
||||
} catch (UrlException $e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!in_array($parsedUrl->getHost(), ['www.bmd.com', 'bmd.com'])) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$lang = '';
|
||||
|
||||
// extract language from url
|
||||
$path = explode('/', $parsedUrl->getPath());
|
||||
if (count($path) > 1) {
|
||||
$lang = $path[1];
|
||||
|
||||
// validate data
|
||||
if ($this->getURIbyCountry($lang) == '') {
|
||||
$lang = '';
|
||||
}
|
||||
}
|
||||
|
||||
// if no country available, find language by browser
|
||||
if ($lang == '') {
|
||||
$srvLanguages = explode(';', $_SERVER['HTTP_ACCEPT_LANGUAGE']);
|
||||
if (count($srvLanguages) > 0) {
|
||||
$languages = explode(',', $srvLanguages[0]);
|
||||
if (count($languages) > 0) {
|
||||
for ($i = 0; $i < count($languages); $i++) {
|
||||
$langDetails = explode('-', $languages[$i]);
|
||||
if (count($langDetails) > 1) {
|
||||
$lang = $langDetails[1];
|
||||
} else {
|
||||
$lang = substr($srvLanguages[0], 0, 2);
|
||||
}
|
||||
|
||||
// validate data
|
||||
if ($this->getURIbyCountry($lang) == '') {
|
||||
$lang = '';
|
||||
}
|
||||
|
||||
if ($lang != '') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if no URL found by language, use AT as default
|
||||
if ($this->getURIbyCountry($lang) == '') {
|
||||
$lang = 'at';
|
||||
}
|
||||
|
||||
$params = [];
|
||||
$params['country'] = strtolower($lang);
|
||||
|
||||
return $params;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
public function getURI()
|
||||
{
|
||||
$country = $this->getInput('country') ?? '';
|
||||
$lURI = $this->getURIbyCountry($country);
|
||||
return $lURI != '' ? $lURI : parent::getURI();
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
public function getIcon()
|
||||
{
|
||||
return self::BMD_FAV_ICON;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
private function getMetaItemPropContent($elem, $key)
|
||||
{
|
||||
if (($key != '') and (!is_null($elem))) {
|
||||
$metaElem = $elem->find('meta[itemprop=' . $key . ']', 0);
|
||||
if (!is_null($metaElem)) {
|
||||
return $metaElem->getAttribute('content');
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
private function getURIbyCountry($country)
|
||||
{
|
||||
switch (strtolower($country)) {
|
||||
case 'at':
|
||||
return 'https://www.bmd.com/at/ueber-bmd/blog-ohne-filter.html';
|
||||
case 'de':
|
||||
return 'https://www.bmd.com/de/das-ist-bmd/blog.html';
|
||||
case 'ch':
|
||||
return 'https://www.bmd.com/ch/das-ist-bmd/blog.html';
|
||||
case 'sk':
|
||||
return 'https://www.bmd.com/sk/firma/blog.html';
|
||||
case 'cz':
|
||||
return 'https://www.bmd.com/cz/firma/news-blog.html';
|
||||
case 'hu':
|
||||
return 'https://www.bmd.com/hu/rolunk/hirek.html';
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
}
|
||||
}
|
@ -138,6 +138,7 @@ class BadDragonBridge extends BridgeAbstract
|
||||
// Sale
|
||||
$regex = '/^(https?:\/\/)?bad-dragon\.com\/sales/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'Sales';
|
||||
return $params;
|
||||
}
|
||||
|
||||
@ -192,6 +193,7 @@ class BadDragonBridge extends BridgeAbstract
|
||||
isset($urlParams['noAccessories'])
|
||||
&& $urlParams['noAccessories'] === '1'
|
||||
&& $params['noAccessories'] = 'on';
|
||||
$params['context'] = 'Clearance';
|
||||
|
||||
return $params;
|
||||
}
|
||||
@ -282,8 +284,7 @@ class BadDragonBridge extends BridgeAbstract
|
||||
case 'Clearance':
|
||||
$toyData = json_decode(getContents($this->inputToURL(true)));
|
||||
|
||||
$productList = json_decode(getContents(self::URI
|
||||
. 'api/inventory-toy/product-list'));
|
||||
$productList = json_decode(getContents(self::URI . 'api/inventory-toy/product-list'));
|
||||
|
||||
foreach ($toyData->toys as $toy) {
|
||||
$item = [];
|
||||
|
@ -111,12 +111,12 @@ class BandcampBridge extends BridgeAbstract
|
||||
$url = self::URI . 'api/hub/1/dig_deeper';
|
||||
$data = $this->buildRequestJson();
|
||||
$header = [
|
||||
'Content-Type: application/json',
|
||||
'Content-Length: ' . strlen($data)
|
||||
'Content-Type: application/json',
|
||||
'Content-Length: ' . strlen($data),
|
||||
];
|
||||
$opts = [
|
||||
CURLOPT_CUSTOMREQUEST => 'POST',
|
||||
CURLOPT_POSTFIELDS => $data
|
||||
CURLOPT_CUSTOMREQUEST => 'POST',
|
||||
CURLOPT_POSTFIELDS => $data,
|
||||
];
|
||||
$content = getContents($url, $header, $opts);
|
||||
|
||||
@ -314,7 +314,8 @@ class BandcampBridge extends BridgeAbstract
|
||||
{
|
||||
$url = self::URI . 'api/' . $endpoint . '?' . http_build_query($query_data);
|
||||
// todo: 429 Too Many Requests happens a lot
|
||||
$data = json_decode(getContents($url));
|
||||
$response = getContents($url);
|
||||
$data = json_decode($response);
|
||||
return $data;
|
||||
}
|
||||
|
||||
@ -397,6 +398,7 @@ class BandcampBridge extends BridgeAbstract
|
||||
// By tag
|
||||
$regex = '/^(https?:\/\/)?bandcamp\.com\/tag\/([^\/.&?\n]+)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'By tag';
|
||||
$params['tag'] = urldecode($matches[2]);
|
||||
return $params;
|
||||
}
|
||||
@ -404,6 +406,7 @@ class BandcampBridge extends BridgeAbstract
|
||||
// By band
|
||||
$regex = '/^(https?:\/\/)?([^\/.&?\n]+?)\.bandcamp\.com/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'By band';
|
||||
$params['band'] = urldecode($matches[2]);
|
||||
return $params;
|
||||
}
|
||||
@ -411,6 +414,7 @@ class BandcampBridge extends BridgeAbstract
|
||||
// By album
|
||||
$regex = '/^(https?:\/\/)?([^\/.&?\n]+?)\.bandcamp\.com\/album\/([^\/.&?\n]+)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'By album';
|
||||
$params['band'] = urldecode($matches[2]);
|
||||
$params['album'] = urldecode($matches[3]);
|
||||
return $params;
|
||||
|
@ -93,8 +93,7 @@ class BandcampDailyBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->getURI())
|
||||
or returnServerError('Could not request: ' . $this->getURI());
|
||||
$html = getSimpleHTMLDOM($this->getURI());
|
||||
|
||||
$html = defaultLinkTo($html, self::URI);
|
||||
|
||||
@ -105,8 +104,7 @@ class BandcampDailyBridge extends BridgeAbstract
|
||||
|
||||
$articlePath = $article->find('a.title', 0)->href;
|
||||
|
||||
$articlePageHtml = getSimpleHTMLDOMCached($articlePath, 3600)
|
||||
or returnServerError('Could not request: ' . $articlePath);
|
||||
$articlePageHtml = getSimpleHTMLDOMCached($articlePath, 3600);
|
||||
|
||||
$item['uri'] = $articlePath;
|
||||
$item['title'] = $articlePageHtml->find('article-title', 0)->innertext;
|
||||
|
139
bridges/BazarakiBridge.php
Normal file
139
bridges/BazarakiBridge.php
Normal file
@ -0,0 +1,139 @@
|
||||
<?php
|
||||
|
||||
class BazarakiBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Bazaraki Bridge';
|
||||
const URI = 'https://bazaraki.com';
|
||||
const DESCRIPTION = 'Fetch adverts from Bazaraki, a Cyprus-based classifieds website.';
|
||||
const MAINTAINER = 'danwain';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'url' => [
|
||||
'name' => 'URL',
|
||||
'type' => 'text',
|
||||
'required' => true,
|
||||
'title' => 'Enter the URL of the Bazaraki page to fetch adverts from.',
|
||||
'exampleValue' => 'https://www.bazaraki.com/real-estate-for-sale/houses/?lat=0&lng=0&radius=100000',
|
||||
],
|
||||
'limit' => [
|
||||
'name' => 'Limit',
|
||||
'type' => 'number',
|
||||
'required' => false,
|
||||
'title' => 'Enter the number of adverts to fetch. (max 50)',
|
||||
'exampleValue' => '10',
|
||||
'defaultValue' => 10,
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getInput('url');
|
||||
if (! str_starts_with($url, 'https://www.bazaraki.com/')) {
|
||||
throw new \Exception('Nope');
|
||||
}
|
||||
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
|
||||
$i = 0;
|
||||
foreach ($html->find('div.advert') as $element) {
|
||||
$i++;
|
||||
if ($i > $this->getInput('limit') || $i > 50) {
|
||||
break;
|
||||
}
|
||||
|
||||
$item = [];
|
||||
|
||||
$item['uri'] = 'https://www.bazaraki.com' . $element->find('a.advert__content-title', 0)->href;
|
||||
|
||||
# Get the content
|
||||
$advert = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
$price = trim($advert->find('div.announcement-price__cost', 0)->plaintext);
|
||||
$name = trim($element->find('a.advert__content-title', 0)->plaintext);
|
||||
|
||||
$item['title'] = $name . ' - ' . $price;
|
||||
|
||||
$time = trim($advert->find('span.date-meta', 0)->plaintext);
|
||||
$time = str_replace('Posted: ', '', $time);
|
||||
|
||||
|
||||
$item['content'] = $this->processAdvertContent($advert);
|
||||
$item['timestamp'] = $this->convertRelativeTime($time);
|
||||
$item['author'] = trim($advert->find('div.author-name', 0)->plaintext);
|
||||
$item['uid'] = $advert->find('span.number-announcement', 0)->plaintext;
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the advert content to clean up HTML
|
||||
*
|
||||
* @param simple_html_dom $advert The SimpleHTMLDOM object for the advert page
|
||||
* @return string Processed HTML content
|
||||
*/
|
||||
private function processAdvertContent($advert)
|
||||
{
|
||||
// Get the content sections
|
||||
$header = $advert->find('div.announcement-content-header', 0);
|
||||
$characteristics = $advert->find('div.announcement-characteristics', 0);
|
||||
$description = $advert->find('div.js-description', 0);
|
||||
$images = $advert->find('div.announcement__images', 0);
|
||||
|
||||
// Remove all favorites divs
|
||||
foreach ($advert->find('div.announcement-meta__favorites') as $favorites) {
|
||||
$favorites->outertext = '';
|
||||
}
|
||||
|
||||
// Replace all <a> tags with their text content
|
||||
foreach ($advert->find('a') as $a) {
|
||||
$a->outertext = $a->innertext;
|
||||
}
|
||||
|
||||
// Format the content with section headers and dividers
|
||||
$formattedContent = '';
|
||||
|
||||
// Add header section
|
||||
$formattedContent .= $header->innertext;
|
||||
$formattedContent .= '<hr/>';
|
||||
|
||||
// Add characteristics section with header
|
||||
$formattedContent .= '<h3>Details</h3>';
|
||||
$formattedContent .= $characteristics->innertext;
|
||||
$formattedContent .= '<hr/>';
|
||||
|
||||
// Add description section with header
|
||||
$formattedContent .= '<h3>Description</h3>';
|
||||
$formattedContent .= $description->innertext;
|
||||
$formattedContent .= '<hr/>';
|
||||
|
||||
// Add images section with header
|
||||
$formattedContent .= '<h3>Images</h3>';
|
||||
$formattedContent .= $images->innertext;
|
||||
|
||||
return $formattedContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert relative time strings like "Yesterday 12:32" to proper timestamps
|
||||
*
|
||||
* @param string $timeString The relative time string from the website
|
||||
* @return string Timestamp in a format compatible with strtotime()
|
||||
*/
|
||||
private function convertRelativeTime($timeString)
|
||||
{
|
||||
if (strpos($timeString, 'Yesterday') !== false) {
|
||||
// Replace "Yesterday" with actual date
|
||||
$time = str_replace('Yesterday', date('Y-m-d', strtotime('-1 day')), $timeString);
|
||||
return date('Y-m-d H:i:s', strtotime($time));
|
||||
} elseif (strpos($timeString, 'Today') !== false) {
|
||||
// Replace "Today" with actual date
|
||||
$time = str_replace('Today', date('Y-m-d'), $timeString);
|
||||
return date('Y-m-d H:i:s', strtotime($time));
|
||||
} else {
|
||||
// For other formats, return as is and let strtotime handle it
|
||||
return $timeString;
|
||||
}
|
||||
}
|
||||
}
|
@ -8,48 +8,27 @@ class BinanceBridge extends BridgeAbstract
|
||||
const MAINTAINER = 'thefranke';
|
||||
const CACHE_TIMEOUT = 3600; // 1h
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://www.binance.com/bapi/composite/v1/public/content/blog/list?category=&tag=&page=1&size=12';
|
||||
$json = getContents($url);
|
||||
$data = Json::decode($json, false);
|
||||
foreach ($data->data->blogList as $post) {
|
||||
$item = [];
|
||||
$item['title'] = $post->title;
|
||||
// Url slug not in json
|
||||
//$item['uri'] = $uri;
|
||||
$item['timestamp'] = $post->postTimeUTC / 1000;
|
||||
$item['author'] = 'Binance';
|
||||
$item['content'] = $post->brief;
|
||||
//$item['categories'] = $category;
|
||||
$item['uid'] = $post->idStr;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://bin.bnbstatic.com/static/images/common/favicon.ico';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM(self::URI)
|
||||
or returnServerError('Could not fetch Binance blog data.');
|
||||
|
||||
$appData = $html->find('script[id="__APP_DATA"]');
|
||||
$appDataJson = json_decode($appData[0]->innertext);
|
||||
$allposts = $appDataJson->routeProps->f3ac->blogListRes->list;
|
||||
|
||||
foreach ($allposts as $element) {
|
||||
$date = $element->releasedTime;
|
||||
$title = $element->title;
|
||||
$category = $element->category->name;
|
||||
|
||||
$suburl = strtolower($category);
|
||||
$suburl = str_replace(' ', '_', $suburl);
|
||||
|
||||
$uri = self::URI . '/' . $suburl . '/' . $element->idStr;
|
||||
|
||||
$contentHTML = getSimpleHTMLDOMCached($uri);
|
||||
$contentAppData = $contentHTML->find('script[id="__APP_DATA"]');
|
||||
$contentAppDataJson = json_decode($contentAppData[0]->innertext);
|
||||
$content = $contentAppDataJson->routeProps->a106->blogDetail->content;
|
||||
|
||||
$item = [];
|
||||
$item['title'] = $title;
|
||||
$item['uri'] = $uri;
|
||||
$item['timestamp'] = substr($date, 0, -3);
|
||||
$item['author'] = 'Binance';
|
||||
$item['content'] = $content;
|
||||
$item['categories'] = $category;
|
||||
|
||||
$this->items[] = $item;
|
||||
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -7,10 +7,14 @@ class BleepingComputerBridge extends FeedExpander
|
||||
const URI = 'https://www.bleepingcomputer.com/';
|
||||
const DESCRIPTION = 'Returns the newest articles.';
|
||||
|
||||
protected function parseItem($item)
|
||||
public function collectData()
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$feed = static::URI . 'feed/';
|
||||
$this->collectExpandableDatas($feed);
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
if (!$article_html) {
|
||||
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
|
||||
@ -23,10 +27,4 @@ class BleepingComputerBridge extends FeedExpander
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$feed = static::URI . 'feed/';
|
||||
$this->collectExpandableDatas($feed);
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
<?php
|
||||
|
||||
class BlizzardNewsBridge extends XPathAbstract
|
||||
class BlizzardNewsBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Blizzard News';
|
||||
const URI = 'https://news.blizzard.com';
|
||||
@ -35,26 +35,73 @@ class BlizzardNewsBridge extends XPathAbstract
|
||||
];
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
|
||||
const XPATH_EXPRESSION_ITEM = '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article';
|
||||
const XPATH_EXPRESSION_ITEM_TITLE = './/div/div[2]/h2';
|
||||
const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@class="ArticleListItem-description"]/div[@class="h6"]';
|
||||
const XPATH_EXPRESSION_ITEM_URI = './/a[@class="ArticleLink ArticleLink"]/@href';
|
||||
const XPATH_EXPRESSION_ITEM_AUTHOR = '';
|
||||
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp';
|
||||
const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/div[@class="ArticleListItem-image"]/@style';
|
||||
const XPATH_EXPRESSION_ITEM_CATEGORIES = './/div[@class="ArticleListItem-label"]';
|
||||
const SETTING_FIX_ENCODING = true;
|
||||
private const PRODUCT_IDS = [
|
||||
'blt525c436e4a1b0a97',
|
||||
'blt54fbd3787a705054',
|
||||
'blt2031aef34200656d',
|
||||
'blt795c314400d7ded9',
|
||||
'blt5cfc6affa3ca0638',
|
||||
'blt2e50e1521bb84dc6',
|
||||
'blt376fb94931906b6f',
|
||||
'blt81d46fcb05ab8811',
|
||||
'bltede2389c0a8885aa',
|
||||
'blt24859ba8086fb294',
|
||||
'blte27d02816a8ff3e1',
|
||||
'blt2caca37e42f19839',
|
||||
'blt90855744d00cd378',
|
||||
'bltec70ad0ea4fd6d1d',
|
||||
'blt500c1f8b5470bfdb'
|
||||
];
|
||||
|
||||
private const API_PATH = '/api/news/blizzard?';
|
||||
|
||||
/**
|
||||
* Source Web page URL (should provide either HTML or XML content)
|
||||
* @return string
|
||||
*/
|
||||
protected function getSourceUrl()
|
||||
private function getSourceUrl(): string
|
||||
{
|
||||
$locale = $this->getInput('locale');
|
||||
if ('zh-cn' === $locale) {
|
||||
return 'https://cn.news.blizzard.com';
|
||||
$baseUrl = 'https://cn.news.blizzard.com' . self::API_PATH;
|
||||
} else {
|
||||
$baseUrl = 'https://news.blizzard.com/' . $locale . self::API_PATH;
|
||||
}
|
||||
return 'https://news.blizzard.com/' . $locale;
|
||||
return $baseUrl .= http_build_query([
|
||||
'feedCxpProductIds' => self::PRODUCT_IDS
|
||||
]);
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$feedContent = json_decode(getContents($this->getSourceUrl()), true);
|
||||
|
||||
foreach ($feedContent['feed']['contentItems'] as $entry) {
|
||||
$properties = $entry['properties'];
|
||||
|
||||
$item = [];
|
||||
|
||||
$item['title'] = $this->filterChars($properties['title']);
|
||||
$item['content'] = $this->filterChars($properties['summary']);
|
||||
$item['uri'] = $properties['newsUrl'];
|
||||
$item['author'] = $this->filterChars($properties['author']);
|
||||
$item['timestamp'] = strtotime($properties['lastUpdated']);
|
||||
$item['enclosures'] = [$properties['staticAsset']['imageUrl']];
|
||||
$item['categories'] = [$this->filterChars($properties['cxpProduct']['title'])];
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
private function filterChars($content)
|
||||
{
|
||||
return htmlspecialchars($content, ENT_XML1);
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return <<<icon
|
||||
https://dfbmfbnnydoln.cloudfront.net/production/images/favicons/favicon.ba01bb119359d74970b02902472fd82e96b5aba7.ico
|
||||
icon;
|
||||
}
|
||||
}
|
||||
|
670
bridges/BlueskyBridge.php
Normal file
670
bridges/BlueskyBridge.php
Normal file
@ -0,0 +1,670 @@
|
||||
<?php
|
||||
|
||||
class BlueskyBridge extends BridgeAbstract
|
||||
{
|
||||
//Initial PR by [RSSBridge contributors](https://github.com/RSS-Bridge/rss-bridge/issues/4058).
|
||||
//Modified from [©DIYgod and contributors at RSSHub](https://github.com/DIYgod/RSSHub/tree/master/lib/routes/bsky), MIT License';
|
||||
const NAME = 'Bluesky Bridge';
|
||||
const URI = 'https://bsky.app';
|
||||
const DESCRIPTION = 'Fetches posts from Bluesky';
|
||||
const MAINTAINER = 'mruac';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'data_source' => [
|
||||
'name' => 'Bluesky Data Source',
|
||||
'type' => 'list',
|
||||
'defaultValue' => 'Profile',
|
||||
'values' => [
|
||||
'Profile' => 'getAuthorFeed',
|
||||
],
|
||||
'title' => 'Select the type of data source to fetch from Bluesky.'
|
||||
],
|
||||
'user_id' => [
|
||||
'name' => 'User Handle or DID',
|
||||
'type' => 'text',
|
||||
'required' => true,
|
||||
'exampleValue' => 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'title' => 'ATProto / Bsky.app handle or DID'
|
||||
],
|
||||
'feed_filter' => [
|
||||
'name' => 'Feed type',
|
||||
'type' => 'list',
|
||||
'defaultValue' => 'posts_and_author_threads',
|
||||
'values' => [
|
||||
'Posts feed' => 'posts_and_author_threads',
|
||||
'All posts and replies' => 'posts_with_replies',
|
||||
'Root posts only' => 'posts_no_replies',
|
||||
'Media only' => 'posts_with_media',
|
||||
]
|
||||
],
|
||||
|
||||
'include_reposts' => [
|
||||
'name' => 'Include Reposts?',
|
||||
'type' => 'checkbox',
|
||||
'defaultValue' => 'checked'
|
||||
],
|
||||
|
||||
'include_reply_context' => [
|
||||
'name' => 'Include Reply context?',
|
||||
'type' => 'checkbox'
|
||||
],
|
||||
|
||||
'verbose_title' => [
|
||||
'name' => 'Use verbose feed item titles?',
|
||||
'type' => 'checkbox'
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
private $profile;
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (isset($this->profile)) {
|
||||
if ($this->profile['handle'] === 'handle.invalid') {
|
||||
return sprintf('Bluesky - %s', $this->profile['displayName']);
|
||||
} else {
|
||||
return sprintf('Bluesky - %s (@%s)', $this->profile['displayName'], $this->profile['handle']);
|
||||
}
|
||||
}
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
if (isset($this->profile)) {
|
||||
if ($this->profile['handle'] === 'handle.invalid') {
|
||||
return self::URI . '/profile/' . $this->profile['did'];
|
||||
} else {
|
||||
return self::URI . '/profile/' . $this->profile['handle'];
|
||||
}
|
||||
}
|
||||
return parent::getURI();
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
if (isset($this->profile)) {
|
||||
return $this->profile['avatar'];
|
||||
}
|
||||
return parent::getIcon();
|
||||
}
|
||||
|
||||
public function getDescription()
|
||||
{
|
||||
if (isset($this->profile)) {
|
||||
return $this->profile['description'];
|
||||
}
|
||||
return parent::getDescription();
|
||||
}
|
||||
|
||||
private function parseExternal($external, $did)
|
||||
{
|
||||
$description = '';
|
||||
$externalUri = $external['uri'];
|
||||
$externalTitle = e($external['title']);
|
||||
$externalDescription = e($external['description']);
|
||||
$thumb = $external['thumb'] ?? null;
|
||||
|
||||
if (preg_match('/http(|s):\/\/media\.tenor\.com/', $externalUri)) {
|
||||
//tenor gif embed
|
||||
$tenorInterstitial = str_replace('media.tenor.com', 'media1.tenor.com/m', $externalUri);
|
||||
$description .= "<figure><a href=\"$tenorInterstitial\"><img src=\"$externalUri\"/></a><figcaption>$externalTitle</figcaption></figure>";
|
||||
} else {
|
||||
//link embed preview
|
||||
$host = parse_url($externalUri)['host'];
|
||||
$thumbDesc = $thumb ? ('<img src="https://cdn.bsky.app/img/feed_thumbnail/plain/' . $did . '/' . $thumb['ref']['$link'] . '@jpeg"/>') : '';
|
||||
$externalDescription = strlen($externalDescription) > 0 ? "<figcaption>($host) $externalDescription</figcaption>" : '';
|
||||
$description .= '<br><blockquote><b><a href="' . $externalUri . '">' . $externalTitle . '</a></b>';
|
||||
$description .= '<figure>' . $thumbDesc . $externalDescription . '</figure></blockquote>';
|
||||
}
|
||||
return $description;
|
||||
}
|
||||
|
||||
private function textToDescription($record)
|
||||
{
|
||||
if (isset($record['value'])) {
|
||||
$record = $record['value'];
|
||||
}
|
||||
$text = $record['text'];
|
||||
$text_copy = $text;
|
||||
$text = nl2br(e($text));
|
||||
if (isset($record['facets'])) {
|
||||
$facets = $record['facets'];
|
||||
foreach ($facets as $facet) {
|
||||
if ($facet['features'][0]['$type'] === 'app.bsky.richtext.facet#link') {
|
||||
$substring = substr($text_copy, $facet['index']['byteStart'], $facet['index']['byteEnd'] - $facet['index']['byteStart']);
|
||||
$text = str_replace($substring, '<a href="' . $facet['features'][0]['uri'] . '">' . $substring . '</a>', $text);
|
||||
}
|
||||
}
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$user_id = $this->getInput('user_id');
|
||||
$handle_match = preg_match('/(?:[a-zA-Z]*\.)+([a-zA-Z](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)/', $user_id, $handle_res); //gets the TLD in $handle_match[1]
|
||||
$did_match = preg_match('/did:plc:[a-z2-7]{24}/', $user_id); //https://github.com/did-method-plc/did-method-plc#identifier-syntax
|
||||
$exclude = ['alt', 'arpa', 'example', 'internal', 'invalid', 'local', 'localhost', 'onion']; //https://en.wikipedia.org/wiki/Top-level_domain#Reserved_domains
|
||||
if ($handle_match == true && array_search($handle_res[1], $exclude) == false) {
|
||||
//valid bsky handle
|
||||
$did = $this->resolveHandle($user_id);
|
||||
} elseif ($did_match == true) {
|
||||
//valid DID
|
||||
$did = $user_id;
|
||||
} else {
|
||||
returnClientError('Invalid ATproto handle or DID provided.');
|
||||
}
|
||||
|
||||
$filter = $this->getInput('feed_filter') ?: 'posts_and_author_threads';
|
||||
$replyContext = $this->getInput('include_reply_context');
|
||||
|
||||
$this->profile = $this->getProfile($did);
|
||||
$authorFeed = $this->getAuthorFeed($did, $filter);
|
||||
|
||||
foreach ($authorFeed['feed'] as $post) {
|
||||
$postRecord = $post['post']['record'];
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = self::URI . '/profile/' . $this->fallbackAuthor($post['post']['author'], 'url') . '/post/' . explode('app.bsky.feed.post/', $post['post']['uri'])[1];
|
||||
$item['title'] = $this->getInput('verbose_title') ? $this->generateVerboseTitle($post) : strtok($postRecord['text'], "\n");
|
||||
$item['timestamp'] = strtotime($postRecord['createdAt']);
|
||||
$item['author'] = $this->fallbackAuthor($post['post']['author'], 'display');
|
||||
|
||||
$postAuthorDID = $post['post']['author']['did'];
|
||||
$postAuthorHandle = $post['post']['author']['handle'] !== 'handle.invalid' ? '<i>@' . $post['post']['author']['handle'] . '</i> ' : '';
|
||||
$postDisplayName = $post['post']['author']['displayName'] ?? '';
|
||||
$postDisplayName = e($postDisplayName);
|
||||
$postUri = $item['uri'];
|
||||
|
||||
if (Debug::isEnabled()) {
|
||||
$url = explode('/', $post['post']['uri']);
|
||||
$this->logger->debug('https://bsky.app/profile/' . $url[2] . '/post/' . $url[4]);
|
||||
}
|
||||
|
||||
$description = '';
|
||||
$description .= '<p>';
|
||||
//post
|
||||
$description .= $this->getPostDescription(
|
||||
$postDisplayName,
|
||||
$postAuthorHandle,
|
||||
$postUri,
|
||||
$postRecord,
|
||||
'post'
|
||||
);
|
||||
|
||||
if (isset($postRecord['embed']['$type'])) {
|
||||
//post link embed
|
||||
if ($postRecord['embed']['$type'] === 'app.bsky.embed.external') {
|
||||
$description .= $this->parseExternal($postRecord['embed']['external'], $postAuthorDID);
|
||||
} elseif (
|
||||
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
|
||||
$postRecord['embed']['media']['$type'] === 'app.bsky.embed.external'
|
||||
) {
|
||||
$description .= $this->parseExternal($postRecord['embed']['media']['external'], $postAuthorDID);
|
||||
}
|
||||
|
||||
//post images
|
||||
if (
|
||||
$postRecord['embed']['$type'] === 'app.bsky.embed.images' ||
|
||||
(
|
||||
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
|
||||
$postRecord['embed']['media']['$type'] === 'app.bsky.embed.images'
|
||||
)
|
||||
) {
|
||||
$images = $post['post']['embed']['images'] ?? $post['post']['embed']['media']['images'];
|
||||
foreach ($images as $image) {
|
||||
$description .= $this->getPostImageDescription($image);
|
||||
}
|
||||
}
|
||||
|
||||
//post video
|
||||
if (
|
||||
$postRecord['embed']['$type'] === 'app.bsky.embed.video' ||
|
||||
(
|
||||
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
|
||||
$postRecord['embed']['media']['$type'] === 'app.bsky.embed.video'
|
||||
)
|
||||
) {
|
||||
$description .= $this->getPostVideoDescription(
|
||||
$postRecord['embed']['video'] ?? $postRecord['embed']['media']['video'],
|
||||
$postAuthorDID
|
||||
);
|
||||
}
|
||||
}
|
||||
$description .= '</p>';
|
||||
|
||||
//quote post
|
||||
if (
|
||||
isset($postRecord['embed']) &&
|
||||
(
|
||||
$postRecord['embed']['$type'] === 'app.bsky.embed.record' ||
|
||||
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia'
|
||||
) &&
|
||||
isset($post['post']['embed']['record'])
|
||||
) {
|
||||
$description .= '<p>';
|
||||
$quotedRecord = $post['post']['embed']['record']['record'] ?? $post['post']['embed']['record'];
|
||||
|
||||
if (isset($quotedRecord['notFound']) && $quotedRecord['notFound']) { //deleted post
|
||||
$description .= 'Quoted post deleted.';
|
||||
} elseif (isset($quotedRecord['detached']) && $quotedRecord['detached']) { //detached quote
|
||||
$uri_explode = explode('/', $quotedRecord['uri']);
|
||||
$uri_reconstructed = self::URI . '/profile/' . $uri_explode[2] . '/post/' . $uri_explode[4];
|
||||
$description .= '<a href="' . $uri_reconstructed . '">Quoted post detached.</a>';
|
||||
} elseif (isset($quotedRecord['blocked']) && $quotedRecord['blocked']) { //blocked by quote author
|
||||
$description .= 'Author of quoted post has blocked OP.';
|
||||
} elseif (
|
||||
($quotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView' ||
|
||||
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#listView'
|
||||
) {
|
||||
$description .= $this->getListFeedDescription($quotedRecord);
|
||||
} elseif (
|
||||
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.starterpack' ||
|
||||
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#starterPackViewBasic'
|
||||
) {
|
||||
$description .= $this->getStarterPackDescription($post['post']['embed']['record']);
|
||||
} else {
|
||||
$quotedAuthorDid = $quotedRecord['author']['did'];
|
||||
$quotedDisplayName = $quotedRecord['author']['displayName'] ?? '';
|
||||
$quotedDisplayName = e($quotedDisplayName);
|
||||
$quotedAuthorHandle = $quotedRecord['author']['handle'] !== 'handle.invalid' ? '<i>@' . $quotedRecord['author']['handle'] . '</i>' : '';
|
||||
|
||||
$parts = explode('/', $quotedRecord['uri']);
|
||||
$quotedPostId = end($parts);
|
||||
$quotedPostUri = self::URI . '/profile/' . $this->fallbackAuthor($quotedRecord['author'], 'url') . '/post/' . $quotedPostId;
|
||||
|
||||
//quoted post - post
|
||||
$description .= $this->getPostDescription(
|
||||
$quotedDisplayName,
|
||||
$quotedAuthorHandle,
|
||||
$quotedPostUri,
|
||||
$quotedRecord,
|
||||
'quote'
|
||||
);
|
||||
|
||||
if (isset($quotedRecord['value']['embed']['$type'])) {
|
||||
//quoted post - post link embed
|
||||
if ($quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.external') {
|
||||
$description .= $this->parseExternal($quotedRecord['value']['embed']['external'], $quotedAuthorDid);
|
||||
}
|
||||
|
||||
//quoted post - post video
|
||||
if (
|
||||
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.video' ||
|
||||
(
|
||||
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
|
||||
$quotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.video'
|
||||
)
|
||||
) {
|
||||
$description .= $this->getPostVideoDescription(
|
||||
$quotedRecord['value']['embed']['video'] ?? $quotedRecord['value']['embed']['media']['video'],
|
||||
$quotedAuthorDid
|
||||
);
|
||||
}
|
||||
|
||||
//quoted post - post images
|
||||
if (
|
||||
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.images' ||
|
||||
(
|
||||
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
|
||||
$quotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.images'
|
||||
)
|
||||
) {
|
||||
foreach ($quotedRecord['embeds'] as $embed) {
|
||||
if (
|
||||
$embed['$type'] === 'app.bsky.embed.images#view' ||
|
||||
($embed['$type'] === 'app.bsky.embed.recordWithMedia#view' && $embed['media']['$type'] === 'app.bsky.embed.images#view')
|
||||
) {
|
||||
$images = $embed['images'] ?? $embed['media']['images'];
|
||||
foreach ($images as $image) {
|
||||
$description .= $this->getPostImageDescription($image);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$description .= '</p>';
|
||||
}
|
||||
|
||||
//reply
|
||||
if ($replyContext && isset($post['reply']) && !isset($post['reply']['parent']['notFound'])) {
|
||||
$replyPost = $post['reply']['parent'];
|
||||
$replyPostRecord = $replyPost['record'];
|
||||
$description .= '<hr/>';
|
||||
$description .= '<p>';
|
||||
|
||||
$replyPostAuthorDID = $replyPost['author']['did'];
|
||||
$replyPostAuthorHandle = $replyPost['author']['handle'] !== 'handle.invalid' ? '<i>@' . $replyPost['author']['handle'] . '</i> ' : '';
|
||||
$replyPostDisplayName = $replyPost['author']['displayName'] ?? '';
|
||||
$replyPostDisplayName = e($replyPostDisplayName);
|
||||
$replyPostUri = self::URI . '/profile/' . $this->fallbackAuthor($replyPost['author'], 'url') . '/post/' . explode('app.bsky.feed.post/', $replyPost['uri'])[1];
|
||||
|
||||
// reply post
|
||||
$description .= $this->getPostDescription(
|
||||
$replyPostDisplayName,
|
||||
$replyPostAuthorHandle,
|
||||
$replyPostUri,
|
||||
$replyPostRecord,
|
||||
'reply'
|
||||
);
|
||||
|
||||
if (isset($replyPostRecord['embed']['$type'])) {
|
||||
//post link embed
|
||||
if ($replyPostRecord['embed']['$type'] === 'app.bsky.embed.external') {
|
||||
$description .= $this->parseExternal($replyPostRecord['embed']['external'], $replyPostAuthorDID);
|
||||
} elseif (
|
||||
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
|
||||
$replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.external'
|
||||
) {
|
||||
$description .= $this->parseExternal($replyPostRecord['embed']['media']['external'], $replyPostAuthorDID);
|
||||
}
|
||||
|
||||
//post images
|
||||
if (
|
||||
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.images' ||
|
||||
(
|
||||
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
|
||||
$replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.images'
|
||||
)
|
||||
) {
|
||||
$images = $replyPost['embed']['images'] ?? $replyPost['embed']['media']['images'];
|
||||
foreach ($images as $image) {
|
||||
$description .= $this->getPostImageDescription($image);
|
||||
}
|
||||
}
|
||||
|
||||
//post video
|
||||
if (
|
||||
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.video' ||
|
||||
(
|
||||
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
|
||||
$replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.video'
|
||||
)
|
||||
) {
|
||||
$description .= $this->getPostVideoDescription(
|
||||
$replyPostRecord['embed']['video'] ?? $replyPostRecord['embed']['media']['video'],
|
||||
$replyPostAuthorDID
|
||||
);
|
||||
}
|
||||
}
|
||||
$description .= '</p>';
|
||||
|
||||
//quote post
|
||||
if (
|
||||
isset($replyPostRecord['embed']) &&
|
||||
($replyPostRecord['embed']['$type'] === 'app.bsky.embed.record' || $replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia') &&
|
||||
isset($replyPost['embed']['record'])
|
||||
) {
|
||||
$description .= '<p>';
|
||||
$replyQuotedRecord = $replyPost['embed']['record']['record'] ?? $replyPost['embed']['record'];
|
||||
|
||||
if (isset($replyQuotedRecord['notFound']) && $replyQuotedRecord['notFound']) { //deleted post
|
||||
$description .= 'Quoted post deleted.';
|
||||
} elseif (isset($replyQuotedRecord['detached']) && $replyQuotedRecord['detached']) { //detached quote
|
||||
$uri_explode = explode('/', $replyQuotedRecord['uri']);
|
||||
$uri_reconstructed = self::URI . '/profile/' . $uri_explode[2] . '/post/' . $uri_explode[4];
|
||||
$description .= '<a href="' . $uri_reconstructed . '">Quoted post detached.</a>';
|
||||
} elseif (isset($replyQuotedRecord['blocked']) && $replyQuotedRecord['blocked']) { //blocked by quote author
|
||||
$description .= 'Author of quoted post has blocked OP.';
|
||||
} elseif (
|
||||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView' ||
|
||||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#listView'
|
||||
) {
|
||||
$description .= $this->getListFeedDescription($replyQuotedRecord);
|
||||
} elseif (
|
||||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.starterpack' ||
|
||||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#starterPackViewBasic'
|
||||
) {
|
||||
$description .= $this->getStarterPackDescription($replyPost['embed']['record']);
|
||||
} else {
|
||||
$quotedAuthorDid = $replyQuotedRecord['author']['did'];
|
||||
$quotedDisplayName = $replyQuotedRecord['author']['displayName'] ?? '';
|
||||
$quotedDisplayName = e($quotedDisplayName);
|
||||
$quotedAuthorHandle = $replyQuotedRecord['author']['handle'] !== 'handle.invalid' ? '<i>@' . $replyQuotedRecord['author']['handle'] . '</i>' : '';
|
||||
|
||||
$parts = explode('/', $replyQuotedRecord['uri']);
|
||||
$quotedPostId = end($parts);
|
||||
$quotedPostUri = self::URI . '/profile/' . $this->fallbackAuthor($replyQuotedRecord['author'], 'url') . '/post/' . $quotedPostId;
|
||||
|
||||
//quoted post - post
|
||||
$description .= $this->getPostDescription(
|
||||
$quotedDisplayName,
|
||||
$quotedAuthorHandle,
|
||||
$quotedPostUri,
|
||||
$replyQuotedRecord,
|
||||
'quote'
|
||||
);
|
||||
|
||||
if (isset($replyQuotedRecord['value']['embed']['$type'])) {
|
||||
//quoted post - post link embed
|
||||
if ($replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.external') {
|
||||
$description .= $this->parseExternal($replyQuotedRecord['value']['embed']['external'], $quotedAuthorDid);
|
||||
}
|
||||
|
||||
//quoted post - post video
|
||||
if (
|
||||
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.video' ||
|
||||
(
|
||||
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
|
||||
$replyQuotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.video'
|
||||
)
|
||||
) {
|
||||
$description .= $this->getPostVideoDescription(
|
||||
$replyQuotedRecord['value']['embed']['video'] ?? $replyQuotedRecord['value']['embed']['media']['video'],
|
||||
$quotedAuthorDid
|
||||
);
|
||||
}
|
||||
|
||||
//quoted post - post images
|
||||
if (
|
||||
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.images' ||
|
||||
(
|
||||
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
|
||||
$replyQuotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.images'
|
||||
)
|
||||
) {
|
||||
foreach ($replyQuotedRecord['embeds'] as $embed) {
|
||||
if (
|
||||
$embed['$type'] === 'app.bsky.embed.images#view' ||
|
||||
($embed['$type'] === 'app.bsky.embed.recordWithMedia#view' && $embed['media']['$type'] === 'app.bsky.embed.images#view')
|
||||
) {
|
||||
$images = $embed['images'] ?? $embed['media']['images'];
|
||||
foreach ($images as $image) {
|
||||
$description .= $this->getPostImageDescription($image);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$description .= '</p>';
|
||||
}
|
||||
}
|
||||
|
||||
$item['content'] = $description;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
private function getPostVideoDescription(array $video, $authorDID)
|
||||
{
|
||||
//https://video.bsky.app/watch/$did/$cid/thumbnail.jpg
|
||||
$videoCID = $video['ref']['$link'];
|
||||
$videoMime = $video['mimeType'];
|
||||
$thumbnail = "poster=\"https://video.bsky.app/watch/$authorDID/$videoCID/thumbnail.jpg\"" ?? '';
|
||||
$videoURL = "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=$authorDID&cid=$videoCID";
|
||||
return "<figure><video loop $thumbnail controls src=\"$videoURL\" type=\"$videoMime\"/></figure>";
|
||||
}
|
||||
|
||||
private function getPostImageDescription(array $image)
|
||||
{
|
||||
$thumbnailUrl = $image['thumb'];
|
||||
$fullsizeUrl = $image['fullsize'];
|
||||
$alt = strlen($image['alt']) > 0 ? '<figcaption>' . e($image['alt']) . '</figcaption>' : '';
|
||||
return "<figure><a href=\"$fullsizeUrl\"><img src=\"$thumbnailUrl\"></a>$alt</figure>";
|
||||
}
|
||||
|
||||
private function getPostDescription(
|
||||
string $postDisplayName,
|
||||
string $postAuthorHandle,
|
||||
string $postUri,
|
||||
array $postRecord,
|
||||
string $type
|
||||
) {
|
||||
$description = '';
|
||||
if ($type === 'quote') {
|
||||
// Quoted post/reply from bbb @bbb.com:
|
||||
$postType = isset($postRecord['reply']) ? 'reply' : 'post';
|
||||
$description .= "<a href=\"$postUri\">Quoted $postType</a> from <b>$postDisplayName</b> $postAuthorHandle:<br>";
|
||||
} elseif ($type === 'reply') {
|
||||
// Replying to aaa @aaa.com's post/reply:
|
||||
$postType = isset($postRecord['reply']) ? 'reply' : 'post';
|
||||
$description .= "Replying to <b>$postDisplayName</b> $postAuthorHandle's <a href=\"$postUri\">$postType</a>:<br>";
|
||||
} else {
|
||||
// aaa @aaa.com posted:
|
||||
$description .= "<b>$postDisplayName</b> $postAuthorHandle <a href=\"$postUri\">posted</a>:<br>";
|
||||
}
|
||||
$description .= $this->textToDescription($postRecord);
|
||||
return $description;
|
||||
}
|
||||
|
||||
//used if handle verification fails, fallsback to displayName or DID depending on context.
|
||||
private function fallbackAuthor($author, $reason)
|
||||
{
|
||||
if ($author['handle'] === 'handle.invalid') {
|
||||
switch ($reason) {
|
||||
case 'url':
|
||||
return $author['did'];
|
||||
case 'display':
|
||||
$displayName = $author['displayName'] ?? '';
|
||||
return e($displayName);
|
||||
}
|
||||
}
|
||||
return $author['handle'];
|
||||
}
|
||||
|
||||
private function generateVerboseTitle($post)
|
||||
{
|
||||
//use "Post by A, replying to B, quoting C" instead of post contents
|
||||
$title = '';
|
||||
if (isset($post['reason']) && str_contains($post['reason']['$type'], 'reasonRepost')) {
|
||||
$title .= 'Repost by ' . $this->fallbackAuthor($post['reason']['by'], 'display') . ', post by ' . $this->fallbackAuthor($post['post']['author'], 'display');
|
||||
} else {
|
||||
$title .= 'Post by ' . $this->fallbackAuthor($post['post']['author'], 'display');
|
||||
}
|
||||
|
||||
if (isset($post['reply'])) {
|
||||
if (isset($post['reply']['parent']['blocked'])) {
|
||||
$replyAuthor = 'blocked user';
|
||||
} elseif (isset($post['reply']['parent']['notFound'])) {
|
||||
$replyAuthor = 'deleted post';
|
||||
} else {
|
||||
$replyAuthor = $this->fallbackAuthor($post['reply']['parent']['author'], 'display');
|
||||
}
|
||||
$title .= ', replying to ' . $replyAuthor;
|
||||
}
|
||||
|
||||
if (
|
||||
isset($post['post']['embed']) &&
|
||||
isset($post['post']['embed']['record']) &&
|
||||
//if not starter pack, feed or list
|
||||
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.feed.defs#generatorView' &&
|
||||
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.graph.defs#listView' &&
|
||||
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.graph.defs#starterPackViewBasic'
|
||||
) {
|
||||
if (isset($post['post']['embed']['record']['blocked'])) {
|
||||
$quotedAuthor = 'blocked user';
|
||||
} elseif (isset($post['post']['embed']['record']['notFound'])) {
|
||||
$quotedAuthor = 'deleted psost';
|
||||
} elseif (isset($post['post']['embed']['record']['detached'])) {
|
||||
$quotedAuthor = 'detached post';
|
||||
} else {
|
||||
$quotedAuthor = $this->fallbackAuthor($post['post']['embed']['record']['record']['author'] ?? $post['post']['embed']['record']['author'], 'display');
|
||||
}
|
||||
$title .= ', quoting ' . $quotedAuthor;
|
||||
}
|
||||
return $title;
|
||||
}
|
||||
|
||||
private function resolveHandle($handle)
|
||||
{
|
||||
$uri = 'https://public.api.bsky.app/xrpc/com.atproto.identity.resolveHandle?handle=' . urlencode($handle);
|
||||
$response = json_decode(getContents($uri), true);
|
||||
return $response['did'];
|
||||
}
|
||||
|
||||
private function getProfile($did)
|
||||
{
|
||||
$uri = 'https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor=' . urlencode($did);
|
||||
$response = json_decode(getContents($uri), true);
|
||||
return $response;
|
||||
}
|
||||
|
||||
private function getAuthorFeed($did, $filter)
|
||||
{
|
||||
$uri = 'https://public.api.bsky.app/xrpc/app.bsky.feed.getAuthorFeed?actor=' . urlencode($did) . '&filter=' . urlencode($filter) . '&limit=30';
|
||||
if (Debug::isEnabled()) {
|
||||
$this->logger->debug($uri);
|
||||
}
|
||||
$response = json_decode(getContents($uri), true);
|
||||
return $response;
|
||||
}
|
||||
|
||||
//Embed for generated feeds and lists
|
||||
private function getListFeedDescription(array $record): string
|
||||
{
|
||||
$feedViewAvatar = isset($record['avatar']) ? '<img src="' . preg_replace('/\/img\/avatar\//', '/img/avatar_thumbnail/', $record['avatar']) . '">' : '';
|
||||
$feedViewName = e($record['displayName'] ?? $record['name']);
|
||||
$feedViewDescription = e($record['description'] ?? '');
|
||||
$authorDisplayName = e($record['creator']['displayName']);
|
||||
$authorHandle = e($record['creator']['handle']);
|
||||
$likeCount = isset($record['likeCount']) ? '<br>Liked by ' . e($record['likeCount']) . ' users' : '';
|
||||
preg_match('/\/([^\/]+)$/', $record['uri'], $matches);
|
||||
if (($record['purpose'] ?? '') === 'app.bsky.graph.defs#modlist') {
|
||||
$typeURL = '/lists/';
|
||||
$typeDesc = 'moderation list';
|
||||
} elseif (($record['purpose'] ?? '') === 'app.bsky.graph.defs#curatelist') {
|
||||
$typeURL = '/lists/';
|
||||
$typeDesc = 'list';
|
||||
} else {
|
||||
$typeURL = '/feed/';
|
||||
$typeDesc = 'feed';
|
||||
}
|
||||
$uri = e('https://bsky.app/profile/' . $record['creator']['did'] . $typeURL . $matches[1]);
|
||||
|
||||
return <<<END
|
||||
<blockquote>
|
||||
<b><a href="{$uri}">{$feedViewName}</a></b><br/>
|
||||
Bluesky {$typeDesc} by <b>{$authorDisplayName}</b> <i>@{$authorHandle}</i>
|
||||
<figure>
|
||||
{$feedViewAvatar}
|
||||
<figcaption>{$feedViewDescription}{$likeCount}</figcaption>
|
||||
</figure>
|
||||
</blockquote>
|
||||
END;
|
||||
}
|
||||
|
||||
private function getStarterPackDescription(array $record): string
|
||||
{
|
||||
if (!isset($record['record'])) {
|
||||
return 'Failed to get starter pack information.';
|
||||
}
|
||||
$starterpackRecord = $record['record'];
|
||||
$starterpackName = e($starterpackRecord['name']);
|
||||
$starterpackDescription = e($starterpackRecord['description']);
|
||||
$creatorDisplayName = e($record['creator']['displayName']);
|
||||
$creatorHandle = e($record['creator']['handle']);
|
||||
preg_match('/\/([^\/]+)$/', $starterpackRecord['list'], $matches);
|
||||
$uri = e('https://bsky.app/starter-pack/' . $record['creator']['did'] . '/' . $matches[1]);
|
||||
return <<<END
|
||||
<blockquote>
|
||||
<b><a href="{$uri}">{$starterpackName}</a></b><br/>
|
||||
Bluesky starter pack by <b>{$creatorDisplayName}</b> <i>@{$creatorHandle}</i><br/>
|
||||
{$starterpackDescription}
|
||||
</blockquote>
|
||||
END;
|
||||
}
|
||||
}
|
218
bridges/BodaccBridge.php
Normal file
218
bridges/BodaccBridge.php
Normal file
@ -0,0 +1,218 @@
|
||||
<?php
|
||||
|
||||
class BodaccBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'BODACC';
|
||||
const URI = 'https://bodacc-datadila.opendatasoft.com/';
|
||||
const DESCRIPTION = 'Fetches announces from the French Government "Bulletin Officiel Des Annonces Civiles et Commerciales".';
|
||||
const CACHE_TIMEOUT = 86400;
|
||||
const MAINTAINER = 'quent1';
|
||||
const PARAMETERS = [
|
||||
'Annonces commerciales' => [
|
||||
'departement' => [
|
||||
'name' => 'Département',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Tous' => null,
|
||||
'Ain' => '01',
|
||||
'Aisne' => '02',
|
||||
'Allier' => '03',
|
||||
'Alpes-de-Haute-Provence' => '04',
|
||||
'Hautes-Alpes' => '05',
|
||||
'Alpes-Maritimes' => '06',
|
||||
'Ardèche' => '07',
|
||||
'Ardennes' => '08',
|
||||
'Ariège' => '09',
|
||||
'Aube' => '10',
|
||||
'Aude' => '11',
|
||||
'Aveyron' => '12',
|
||||
'Bouches-du-Rhône' => '13',
|
||||
'Calvados' => '14',
|
||||
'Cantal' => '15',
|
||||
'Charente' => '16',
|
||||
'Charente-Maritime' => '17',
|
||||
'Cher' => '18',
|
||||
'Corrèze' => '19',
|
||||
'Corse-du-Sud' => '2A',
|
||||
'Haute-Corse' => '2B',
|
||||
'Côte-d\'Or' => '21',
|
||||
'Côtes-d\'Armor' => '22',
|
||||
'Creuse' => '23',
|
||||
'Dordogne' => '24',
|
||||
'Doubs' => '25',
|
||||
'Drôme' => '26',
|
||||
'Eure' => '27',
|
||||
'Eure-et-Loir' => '28',
|
||||
'Finistère' => '29',
|
||||
'Gard' => '30',
|
||||
'Haute-Garonne' => '31',
|
||||
'Gers' => '32',
|
||||
'Gironde' => '33',
|
||||
'Hérault' => '34',
|
||||
'Ille-et-Vilaine' => '35',
|
||||
'Indre' => '36',
|
||||
'Indre-et-Loire' => '37',
|
||||
'Isère' => '38',
|
||||
'Jura' => '39',
|
||||
'Landes' => '40',
|
||||
'Loir-et-Cher' => '41',
|
||||
'Loire' => '42',
|
||||
'Haute-Loire' => '43',
|
||||
'Loire-Atlantique' => '44',
|
||||
'Loiret' => '45',
|
||||
'Lot' => '46',
|
||||
'Lot-et-Garonne' => '47',
|
||||
'Lozère' => '48',
|
||||
'Maine-et-Loire' => '49',
|
||||
'Manche' => '50',
|
||||
'Marne' => '51',
|
||||
'Haute-Marne' => '52',
|
||||
'Mayenne' => '53',
|
||||
'Meurthe-et-Moselle' => '54',
|
||||
'Meuse' => '55',
|
||||
'Morbihan' => '56',
|
||||
'Moselle' => '57',
|
||||
'Nièvre' => '58',
|
||||
'Nord' => '59',
|
||||
'Oise' => '60',
|
||||
'Orne' => '61',
|
||||
'Pas-de-Calais' => '62',
|
||||
'Puy-de-Dôme' => '63',
|
||||
'Pyrénées-Atlantiques' => '64',
|
||||
'Hautes-Pyrénées' => '65',
|
||||
'Pyrénées-Orientales' => '66',
|
||||
'Bas-Rhin' => '67',
|
||||
'Haut-Rhin' => '68',
|
||||
'Rhône' => '69',
|
||||
'Haute-Saône' => '70',
|
||||
'Saône-et-Loire' => '71',
|
||||
'Sarthe' => '72',
|
||||
'Savoie' => '73',
|
||||
'Haute-Savoie' => '74',
|
||||
'Paris' => '75',
|
||||
'Seine-Maritime' => '76',
|
||||
'Seine-et-Marne' => '77',
|
||||
'Yvelines' => '78',
|
||||
'Deux-Sèvres' => '79',
|
||||
'Somme' => '80',
|
||||
'Tarn' => '81',
|
||||
'Tarn-et-Garonne' => '82',
|
||||
'Var' => '83',
|
||||
'Vaucluse' => '84',
|
||||
'Vendée' => '85',
|
||||
'Vienne' => '86',
|
||||
'Haute-Vienne' => '87',
|
||||
'Vosges' => '88',
|
||||
'Yonne' => '89',
|
||||
'Territoire de Belfort' => '90',
|
||||
'Essonne' => '91',
|
||||
'Hauts-de-Seine' => '92',
|
||||
'Seine-Saint-Denis' => '93',
|
||||
'Val-de-Marne' => '94',
|
||||
'Val-d\'Oise' => '95',
|
||||
'Guadeloupe' => '971',
|
||||
'Martinique' => '972',
|
||||
'Guyane' => '973',
|
||||
'La Réunion' => '974',
|
||||
'Saint-Pierre-et-Miquelon' => '975',
|
||||
'Mayotte' => '976',
|
||||
'Saint-Barthélemy' => '977',
|
||||
'Saint-Martin' => '978',
|
||||
'Terres australes et antarctiques françaises' => '984',
|
||||
'Wallis-et-Futuna' => '986',
|
||||
'Polynésie française' => '987',
|
||||
'Nouvelle-Calédonie' => '988',
|
||||
'Île de Clipperton' => '989'
|
||||
]
|
||||
],
|
||||
'famille' => [
|
||||
'name' => 'Famille',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Toutes' => null,
|
||||
'Annonces diverses' => 'divers',
|
||||
'Créations' => 'creation',
|
||||
'Dépôts des comptes' => 'dpc',
|
||||
'Immatriculations' => 'immatriculation',
|
||||
'Modifications diverses' => 'modification',
|
||||
'Procédures collectives' => 'collective',
|
||||
'Procédures de conciliation' => 'conciliation',
|
||||
'Procédures de rétablissement professionnel' => 'retablissement_professionnel',
|
||||
'Radiations' => 'radiation',
|
||||
'Ventes et cessions' => 'vente'
|
||||
]
|
||||
],
|
||||
'type' => [
|
||||
'name' => 'Type',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Tous' => null,
|
||||
'Avis initial' => 'annonce',
|
||||
'Avis d\'annulation' => 'annulation',
|
||||
'Avis rectificatif' => 'rectificatif'
|
||||
]
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$parameters = [
|
||||
'select' => 'id,dateparution,typeavis_lib,familleavis_lib,commercant,ville,cp',
|
||||
'order_by' => 'id desc',
|
||||
'limit' => 50,
|
||||
];
|
||||
|
||||
$where = [];
|
||||
if (!empty($this->getInput('departement'))) {
|
||||
$where[] = 'numerodepartement="' . $this->getInput('departement') . '"';
|
||||
}
|
||||
|
||||
if (!empty($this->getInput('famille'))) {
|
||||
$where[] = 'familleavis="' . $this->getInput('famille') . '"';
|
||||
}
|
||||
|
||||
if (!empty($this->getInput('type'))) {
|
||||
$where[] = 'typeavis="' . $this->getInput('type') . '"';
|
||||
}
|
||||
|
||||
if ($where !== []) {
|
||||
$parameters['where'] = implode(' and ', $where);
|
||||
}
|
||||
|
||||
$url = urljoin(self::URI, '/api/explore/v2.1/catalog/datasets/annonces-commerciales/records?' . http_build_query($parameters));
|
||||
|
||||
$data = Json::decode(getContents($url), false);
|
||||
|
||||
foreach ($data->results as $result) {
|
||||
if (
|
||||
!isset(
|
||||
$result->id,
|
||||
$result->dateparution,
|
||||
$result->typeavis_lib,
|
||||
$result->familleavis_lib,
|
||||
$result->commercant,
|
||||
$result->ville,
|
||||
$result->cp
|
||||
)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$title = sprintf(
|
||||
'[%s] %s - %s à %s (%s)',
|
||||
$result->typeavis_lib,
|
||||
$result->familleavis_lib,
|
||||
$result->commercant,
|
||||
$result->ville,
|
||||
$result->cp
|
||||
);
|
||||
|
||||
$this->items[] = [
|
||||
'uid' => $result->id,
|
||||
'timestamp' => strtotime($result->dateparution),
|
||||
'title' => $title,
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
@ -1218,14 +1218,15 @@ EOT;
|
||||
$table = $this->generateEventDetailsTable($event);
|
||||
|
||||
$imgsrc = $event['BannerURL'];
|
||||
$FShareURL = $event['FShareURL'];
|
||||
|
||||
return <<<EOT
|
||||
<img title="Event Banner URL" src="$imgsrc"></img>
|
||||
<br>
|
||||
$table
|
||||
<br>
|
||||
More Details are available on the <a href="${event['FShareURL']}">BookMyShow website</a>.
|
||||
EOT;
|
||||
<img title="Event Banner URL" src="$imgsrc">
|
||||
<br>
|
||||
$table
|
||||
<br>
|
||||
More Details are available on the <a href="$FShareURL">BookMyShow website</a>.
|
||||
EOT;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1292,14 +1293,15 @@ EOT;
|
||||
|
||||
$synopsis = preg_replace(self::SYNOPSIS_REGEX, '', $data['EventSynopsis']);
|
||||
|
||||
$eventTrailerURL = $data['EventTrailerURL'];
|
||||
return <<<EOT
|
||||
<img title="Movie Poster" src="$imgsrc"></img>
|
||||
<div>$table</div>
|
||||
<p>$innerHtml</p>
|
||||
<p>${synopsis}</p>
|
||||
More Details are available on the <a href="$url">BookMyShow website</a> and a trailer is available
|
||||
<a href="${data['EventTrailerURL']}" title="Trailer URL">here</a>
|
||||
EOT;
|
||||
<img title="Movie Poster" src="$imgsrc"></img>
|
||||
<div>$table</div>
|
||||
<p>$innerHtml</p>
|
||||
<p>$synopsis</p>
|
||||
More Details are available on the <a href="$url">BookMyShow website</a> and a trailer is available
|
||||
<a href="$eventTrailerURL" title="Trailer URL">here</a>
|
||||
EOT;
|
||||
}
|
||||
|
||||
/**
|
||||
|
63
bridges/BruegelBridge.php
Normal file
63
bridges/BruegelBridge.php
Normal file
@ -0,0 +1,63 @@
|
||||
<?php
|
||||
|
||||
class BruegelBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Bruegel';
|
||||
const URI = 'https://www.bruegel.org';
|
||||
const DESCRIPTION = 'European think-tank commentary and publications.';
|
||||
const MAINTAINER = 'KappaPrajd';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'category' => [
|
||||
'name' => 'Category',
|
||||
'type' => 'list',
|
||||
'defaultValue' => '/publications',
|
||||
'values' => [
|
||||
'Publications' => '/publications',
|
||||
'Commentary' => '/commentary'
|
||||
]
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return self::URI . '/themes/custom/bruegel/assets/favicon/android-icon-72x72.png';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = self::URI . $this->getInput('category');
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
|
||||
$articles = $html->find('.c-listing__content article');
|
||||
|
||||
foreach ($articles as $article) {
|
||||
$title = $article->find('.c-list-item__title a span', 0)->plaintext;
|
||||
$content = trim($article->find('.c-list-item__description', 0)->plaintext);
|
||||
$publishDate = $article->find('.c-list-item__date', 0)->plaintext;
|
||||
$href = $article->find('.c-list-item__title a', 0)->getAttribute('href');
|
||||
|
||||
$item = [
|
||||
'title' => $title,
|
||||
'content' => $content,
|
||||
'timestamp' => strtotime($publishDate),
|
||||
'uri' => self::URI . $href,
|
||||
'author' => $this->getAuthor($article),
|
||||
];
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
private function getAuthor($article)
|
||||
{
|
||||
$authorsElements = $article->find('.c-list-item__authors a');
|
||||
|
||||
$authors = array_map(function ($author) {
|
||||
return $author->plaintext;
|
||||
}, $authorsElements);
|
||||
|
||||
return join(', ', $authors);
|
||||
}
|
||||
}
|
@ -38,50 +38,20 @@ class BrutBridge extends BridgeAbstract
|
||||
]
|
||||
];
|
||||
|
||||
const CACHE_TIMEOUT = 1800; // 30 mins
|
||||
|
||||
private $jsonRegex = '/window\.__PRELOADED_STATE__ = ((?:.*)});/';
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->getURI());
|
||||
|
||||
$results = $html->find('div.results', 0);
|
||||
|
||||
foreach ($results->find('li.col-6.col-sm-4.col-md-3.col-lg-2.px-2.pb-4') as $li) {
|
||||
$item = [];
|
||||
|
||||
$videoPath = self::URI . $li->children(0)->href;
|
||||
$videoPageHtml = getSimpleHTMLDOMCached($videoPath, 3600);
|
||||
|
||||
$json = $this->extractJson($videoPageHtml);
|
||||
$id = array_keys((array) $json->media->index)[0];
|
||||
|
||||
$item['uri'] = $videoPath;
|
||||
$item['title'] = $json->media->index->$id->title;
|
||||
$item['timestamp'] = $json->media->index->$id->published_at;
|
||||
$item['enclosures'][] = $json->media->index->$id->media->thumbnail;
|
||||
|
||||
$description = $json->media->index->$id->description;
|
||||
$article = '';
|
||||
|
||||
if (is_null($json->media->index->$id->media->seo_article) === false) {
|
||||
$article = markdownToHtml($json->media->index->$id->media->seo_article);
|
||||
}
|
||||
|
||||
$item['content'] = <<<EOD
|
||||
<video controls poster="{$json->media->index->$id->media->thumbnail}" preload="none">
|
||||
<source src="{$json->media->index->$id->media->mp4_url}" type="video/mp4">
|
||||
</video>
|
||||
<p>{$description}</p>
|
||||
{$article}
|
||||
EOD;
|
||||
|
||||
$this->items[] = $item;
|
||||
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
$url = $this->getURI();
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$regex = '/window.__PRELOADED_STATE__ = (.*);/';
|
||||
preg_match($regex, $html, $parts);
|
||||
$data = Json::decode($parts[1], false);
|
||||
foreach ($data->medias->index as $uid => $media) {
|
||||
$this->items[] = [
|
||||
'uid' => $uid,
|
||||
'title' => $media->metadata->slug,
|
||||
'uri' => $media->share_url,
|
||||
'timestamp' => $media->published_at,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
@ -90,35 +60,14 @@ EOD;
|
||||
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
|
||||
return self::URI . '/' . $this->getInput('edition') . '/' . $this->getInput('category');
|
||||
}
|
||||
|
||||
return parent::getURI();
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
|
||||
return $this->getKey('category') . ' - ' .
|
||||
$this->getKey('edition') . ' - Brut.';
|
||||
return $this->getKey('category') . ' - ' . $this->getKey('edition') . ' - Brut.';
|
||||
}
|
||||
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract JSON from page
|
||||
*/
|
||||
private function extractJson($html)
|
||||
{
|
||||
if (!preg_match($this->jsonRegex, $html, $parts)) {
|
||||
returnServerError('Failed to extract data from page');
|
||||
}
|
||||
|
||||
$data = json_decode($parts[1]);
|
||||
|
||||
if ($data === false) {
|
||||
returnServerError('Failed to decode extracted data');
|
||||
}
|
||||
|
||||
return $data;
|
||||
}
|
||||
}
|
||||
|
@ -159,12 +159,12 @@ class BugzillaBridge extends BridgeAbstract
|
||||
protected function getUser($user)
|
||||
{
|
||||
// Check if the user endpoint is available
|
||||
if ($this->loadCacheValue($this->instance . 'userEndpointClosed', 86400)) {
|
||||
if ($this->loadCacheValue($this->instance . 'userEndpointClosed')) {
|
||||
return $user;
|
||||
}
|
||||
|
||||
$cache = $this->loadCacheValue($this->instance . $user);
|
||||
if (!is_null($cache)) {
|
||||
if ($cache) {
|
||||
return $cache;
|
||||
}
|
||||
|
||||
|
@ -206,7 +206,7 @@ class BukowskisBridge extends BridgeAbstract
|
||||
$this->items[] = [
|
||||
'title' => $title,
|
||||
'uri' => $baseUrl . $relative_url,
|
||||
'uid' => $lot->getAttribute('data-lot-id'),
|
||||
'uid' => $relative_url,
|
||||
'content' => count($images) > 0 ? "<img src='$images[0]'/><br/>$title" : $title,
|
||||
'enclosures' => array_slice($images, 1),
|
||||
];
|
||||
|
@ -71,7 +71,9 @@ class BundesbankBridge extends BridgeAbstract
|
||||
$item['content'] .= '<strong>' . $study->find('.teasable__subtitle', 0)->plaintext . '</strong>';
|
||||
}
|
||||
|
||||
$item['content'] .= '<p>' . $study->find('.teasable__text', 0)->plaintext . '</p>';
|
||||
$teasable = $study->find('.teasable__text', 0);
|
||||
$teasableText = $teasable->plaintext ?? '';
|
||||
$item['content'] .= '<p>' . $teasableText . '</p>';
|
||||
|
||||
$item['timestamp'] = strtotime($study->find('.teasable__date', 0)->plaintext);
|
||||
|
||||
|
@ -26,18 +26,16 @@ TMPL;
|
||||
https://www.bundestag.de/ajax/filterlist/de/parlament/praesidium/parteienfinanzierung/fundstellen50000/462002-462002
|
||||
URI;
|
||||
// Get the main page
|
||||
$html = getSimpleHTMLDOMCached($ajaxUri, self::CACHE_TIMEOUT)
|
||||
or returnServerError('Could not request AJAX list.');
|
||||
$html = getSimpleHTMLDOMCached($ajaxUri, self::CACHE_TIMEOUT);
|
||||
|
||||
// Build the URL from the first anchor element. The list is sorted by year, descending, so the first element is the current year.
|
||||
$firstAnchor = $html->find('a', 0)
|
||||
or returnServerError('Could not find the proper HTML element.');
|
||||
|
||||
$url = 'https://www.bundestag.de' . $firstAnchor->href;
|
||||
$url = $firstAnchor->href;
|
||||
|
||||
// Get the actual page with the soft money donations
|
||||
$html = getSimpleHTMLDOMCached($url, self::CACHE_TIMEOUT)
|
||||
or returnServerError('Could not request ' . $url);
|
||||
$html = getSimpleHTMLDOMCached($url, self::CACHE_TIMEOUT);
|
||||
|
||||
$rows = $html->find('table.table > tbody > tr')
|
||||
or returnServerError('Could not find the proper HTML elements.');
|
||||
|
28
bridges/BundesverbandFuerFreieKammernBridge.php
Normal file
28
bridges/BundesverbandFuerFreieKammernBridge.php
Normal file
@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
class BundesverbandFuerFreieKammernBridge extends XPathAbstract
|
||||
{
|
||||
const NAME = 'Bundesverband für freie Kammern e.V.';
|
||||
const URI = 'https://www.bffk.de/aktuelles/aktuelle-nachrichten.html';
|
||||
const DESCRIPTION = 'Aktuelle Nachrichten';
|
||||
const MAINTAINER = 'hleskien';
|
||||
|
||||
const FEED_SOURCE_URL = 'https://www.bffk.de/aktuelles/aktuelle-nachrichten.html';
|
||||
//const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"]/@href';
|
||||
const XPATH_EXPRESSION_ITEM = '//ul[@class="article-list"]/li';
|
||||
const XPATH_EXPRESSION_ITEM_TITLE = './/a/text()';
|
||||
const XPATH_EXPRESSION_ITEM_CONTENT = './/a/text()';
|
||||
const XPATH_EXPRESSION_ITEM_URI = './/a/@href';
|
||||
//const XPATH_EXPRESSION_ITEM_AUTHOR = './/';
|
||||
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/span/i';
|
||||
//const XPATH_EXPRESSION_ITEM_ENCLOSURES = './';
|
||||
//const XPATH_EXPRESSION_ITEM_CATEGORIES = './/';
|
||||
|
||||
protected function formatItemTimestamp($value)
|
||||
{
|
||||
$value = trim($value, '()');
|
||||
$dti = DateTimeImmutable::createFromFormat('d.m.Y', $value);
|
||||
$dti = $dti->setTime(0, 0, 0);
|
||||
return $dti->getTimestamp();
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
<?php
|
||||
|
||||
class CNETBridge extends BridgeAbstract
|
||||
class CNETBridge extends SitemapBridge
|
||||
{
|
||||
const MAINTAINER = 'ORelio';
|
||||
const NAME = 'CNET News';
|
||||
@ -14,101 +14,105 @@ class CNETBridge extends BridgeAbstract
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'All articles' => '',
|
||||
'Apple' => 'apple',
|
||||
'Google' => 'google',
|
||||
'Microsoft' => 'tags-microsoft',
|
||||
'Computers' => 'topics-computers',
|
||||
'Mobile' => 'topics-mobile',
|
||||
'Sci-Tech' => 'topics-sci-tech',
|
||||
'Security' => 'topics-security',
|
||||
'Internet' => 'topics-internet',
|
||||
'Tech Industry' => 'topics-tech-industry'
|
||||
'Tech' => 'tech',
|
||||
'Money' => 'personal-finance',
|
||||
'Home' => 'home',
|
||||
'Wellness' => 'health',
|
||||
'Energy' => 'home/energy-and-utilities',
|
||||
'Deals' => 'deals',
|
||||
'Computing' => 'tech/computing',
|
||||
'Mobile' => 'tech/mobile',
|
||||
'Science' => 'science',
|
||||
'Services' => 'tech/services-and-software'
|
||||
]
|
||||
]
|
||||
],
|
||||
'limit' => self::LIMIT
|
||||
]
|
||||
];
|
||||
|
||||
private function cleanArticle($article_html)
|
||||
{
|
||||
$offset_p = strpos($article_html, '<p>');
|
||||
$offset_figure = strpos($article_html, '<figure');
|
||||
$offset = ($offset_figure < $offset_p ? $offset_figure : $offset_p);
|
||||
$article_html = substr($article_html, $offset);
|
||||
$article_html = str_replace('href="/', 'href="' . self::URI, $article_html);
|
||||
$article_html = str_replace(' height="0"', '', $article_html);
|
||||
$article_html = str_replace('<noscript>', '', $article_html);
|
||||
$article_html = str_replace('</noscript>', '', $article_html);
|
||||
$article_html = StripWithDelimiters($article_html, '<a class="clickToEnlarge', '</a>');
|
||||
$article_html = stripWithDelimiters($article_html, '<span class="nowPlaying', '</span>');
|
||||
$article_html = stripWithDelimiters($article_html, '<span class="duration', '</span>');
|
||||
$article_html = stripWithDelimiters($article_html, '<script', '</script>');
|
||||
$article_html = stripWithDelimiters($article_html, '<svg', '</svg>');
|
||||
return $article_html;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
// Retrieve and check user input
|
||||
$topic = str_replace('-', '/', $this->getInput('topic'));
|
||||
if (!empty($topic) && (substr_count($topic, '/') > 1 || !ctype_alpha(str_replace('/', '', $topic)))) {
|
||||
returnClientError('Invalid topic: ' . $topic);
|
||||
$topic = $this->getInput('topic');
|
||||
$limit = $this->getInput('limit');
|
||||
$limit = empty($limit) ? 10 : $limit;
|
||||
|
||||
$url_pattern = empty($topic) ? '' : self::URI . $topic;
|
||||
$sitemap_latest = self::URI . 'sitemaps/article/' . date('Y/m') . '.xml';
|
||||
$sitemap_previous = self::URI . 'sitemaps/article/' . date('Y/m', strtotime('last day of previous month')) . '.xml';
|
||||
|
||||
$links = array_merge(
|
||||
$this->sitemapXmlToList($this->getSitemapXml($sitemap_latest, true), $url_pattern, $limit),
|
||||
$this->sitemapXmlToList($this->getSitemapXml($sitemap_previous, true), $url_pattern, $limit)
|
||||
);
|
||||
|
||||
if ($limit > 0 && count($links) > $limit) {
|
||||
$links = array_slice($links, 0, $limit);
|
||||
}
|
||||
|
||||
// Retrieve webpage
|
||||
$pageUrl = self::URI . (empty($topic) ? 'news/' : $topic . '/');
|
||||
$html = getSimpleHTMLDOM($pageUrl);
|
||||
if (empty($links)) {
|
||||
returnClientError('Failed to retrieve article list');
|
||||
}
|
||||
|
||||
// Process articles
|
||||
foreach ($html->find('div.assetBody, div.riverPost') as $element) {
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
foreach ($links as $article_uri) {
|
||||
$article_dom = convertLazyLoading(getSimpleHTMLDOMCached($article_uri));
|
||||
$title = trim($article_dom->find('h1', 0)->plaintext);
|
||||
$author = $article_dom->find('span.c-assetAuthor_name', 0);
|
||||
$headline = $article_dom->find('p.c-contentHeader_description', 0);
|
||||
$content = $article_dom->find('div.c-pageArticle_content, div.single-article__content, div.article-main-body', 0);
|
||||
$date = null;
|
||||
$enclosure = null;
|
||||
|
||||
$article_title = trim($element->find('h2, h3', 0)->plaintext);
|
||||
$article_uri = self::URI . substr($element->find('a', 0)->href, 1);
|
||||
$article_thumbnail = $element->parent()->find('img[src]', 0)->src;
|
||||
$article_timestamp = strtotime($element->find('time.assetTime, div.timeAgo', 0)->plaintext);
|
||||
$article_author = trim($element->find('a[rel=author], a.name', 0)->plaintext);
|
||||
$article_content = '<p><b>' . trim($element->find('p.dek', 0)->plaintext) . '</b></p>';
|
||||
|
||||
if (is_null($article_thumbnail)) {
|
||||
$article_thumbnail = extractFromDelimiters($element->innertext, '<img src="', '"');
|
||||
}
|
||||
|
||||
if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, self::URI . 'news/') !== false) {
|
||||
$article_html = getSimpleHTMLDOMCached($article_uri) or $article_html = null;
|
||||
|
||||
if (!is_null($article_html)) {
|
||||
if (empty($article_thumbnail)) {
|
||||
$article_thumbnail = $article_html->find('div.originalImage', 0);
|
||||
}
|
||||
if (empty($article_thumbnail)) {
|
||||
$article_thumbnail = $article_html->find('span.imageContainer', 0);
|
||||
}
|
||||
if (is_object($article_thumbnail)) {
|
||||
$article_thumbnail = $article_thumbnail->find('img', 0)->src;
|
||||
}
|
||||
|
||||
$article_content .= trim(
|
||||
$this->cleanArticle(
|
||||
extractFromDelimiters(
|
||||
$article_html,
|
||||
'<article',
|
||||
'<footer'
|
||||
)
|
||||
)
|
||||
);
|
||||
foreach ($article_dom->find('script[type=application/ld+json]') as $ldjson) {
|
||||
$datePublished = extractFromDelimiters($ldjson->innertext, '"datePublished":"', '"');
|
||||
if ($datePublished !== false) {
|
||||
$date = strtotime($datePublished);
|
||||
}
|
||||
$imageObject = extractFromDelimiters($ldjson->innertext, 'ImageObject","url":"', '"');
|
||||
if ($imageObject !== false) {
|
||||
$enclosure = $imageObject;
|
||||
}
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = $article_uri;
|
||||
$item['title'] = $article_title;
|
||||
$item['author'] = $article_author;
|
||||
$item['timestamp'] = $article_timestamp;
|
||||
$item['enclosures'] = [$article_thumbnail];
|
||||
$item['content'] = $article_content;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
foreach ($content->find('div.c-shortcodeGallery') as $cleanup) {
|
||||
$cleanup->outertext = '';
|
||||
}
|
||||
|
||||
foreach ($content->find('figure') as $figure) {
|
||||
$img = $figure->find('img', 0);
|
||||
if ($img) {
|
||||
$figure->outertext = $img->outertext;
|
||||
}
|
||||
}
|
||||
|
||||
$content = $content->innertext;
|
||||
|
||||
if ($enclosure) {
|
||||
$content = "<div><img src=\"$enclosure\" /></div>" . $content;
|
||||
}
|
||||
|
||||
if ($headline) {
|
||||
$content = '<p><b>' . $headline->plaintext . '</b></p><br />' . $content;
|
||||
}
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = $article_uri;
|
||||
$item['title'] = $title;
|
||||
|
||||
if ($author) {
|
||||
$item['author'] = $author->plaintext;
|
||||
}
|
||||
|
||||
$item['content'] = $content;
|
||||
|
||||
if (!is_null($date)) {
|
||||
$item['timestamp'] = $date;
|
||||
}
|
||||
|
||||
if (!is_null($enclosure)) {
|
||||
$item['enclosures'] = [$enclosure];
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -43,10 +43,8 @@ class CNETFranceBridge extends FeedExpander
|
||||
$this->collectExpandableDatas('https://www.cnetfrance.fr/feeds/rss/news/');
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
foreach ($this->bannedTitle as $term) {
|
||||
if (preg_match('/' . $term . '/mi', $item['title']) === 1) {
|
||||
return null;
|
||||
@ -54,7 +52,7 @@ class CNETFranceBridge extends FeedExpander
|
||||
}
|
||||
|
||||
foreach ($this->bannedURL as $term) {
|
||||
if (preg_match('/' . $term . '/mi', $item['uri']) === 1) {
|
||||
if (preg_match('#' . $term . '#mi', $item['uri'])) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -36,12 +36,43 @@ class CVEDetailsBridge extends BridgeAbstract
|
||||
private $vendor = '';
|
||||
private $product = '';
|
||||
|
||||
// Return the URL to query.
|
||||
// Because of the optional product ID, we need to attach it if it is
|
||||
// set. The search result page has the exact same structure (with and
|
||||
// without the product ID).
|
||||
private function buildUrl()
|
||||
public function collectData()
|
||||
{
|
||||
if ($this->html == null) {
|
||||
$this->fetchContent();
|
||||
}
|
||||
|
||||
$var = $this->html->find('#searchresults > div > div.row');
|
||||
foreach ($var as $i => $tr) {
|
||||
$uri = $tr->find('h3 > a', 0)->href ?? null;
|
||||
$title = $tr->find('h3 > a', 0)->innertext;
|
||||
$content = $tr->find('.cvesummarylong', 0)->innertext ?? '';
|
||||
$timestamp = $tr->find('[data-tsvfield="publishDate"]', 0)->innertext ?? 0;
|
||||
|
||||
$this->items[] = [
|
||||
'uri' => $uri,
|
||||
'title' => $title,
|
||||
'timestamp' => $timestamp,
|
||||
'content' => $content,
|
||||
'categories' => [$this->vendor],
|
||||
'enclosures' => [],
|
||||
'uid' => $title,
|
||||
];
|
||||
if (count($this->items) >= 30) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make the actual request to cvedetails.com and stores the response
|
||||
// (HTML) for later use and extract vendor and product from it.
|
||||
private function fetchContent()
|
||||
{
|
||||
// build url
|
||||
// Return the URL to query.
|
||||
// Because of the optional product ID, we need to attach it if it is
|
||||
// set. The search result page has the exact same structure (with and
|
||||
// without the product ID).
|
||||
$url = self::URI . '/vulnerability-list/vendor_id-' . $this->getInput('vendor_id');
|
||||
if ($this->getInput('product_id') !== '') {
|
||||
$url .= '/product_id-' . $this->getInput('product_id');
|
||||
@ -51,32 +82,21 @@ class CVEDetailsBridge extends BridgeAbstract
|
||||
// number, which should be mostly accurate.
|
||||
$url .= '?order=1'; // Order by CVE number DESC
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
// Make the actual request to cvedetails.com and stores the response
|
||||
// (HTML) for later use and extract vendor and product from it.
|
||||
private function fetchContent()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->buildUrl());
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$this->html = defaultLinkTo($html, self::URI);
|
||||
|
||||
$vendor = $html->find('#contentdiv > h1 > a', 0);
|
||||
$vendor = $html->find('#contentdiv h1 > a', 0);
|
||||
if ($vendor == null) {
|
||||
returnServerError('Invalid Vendor ID ' .
|
||||
$this->getInput('vendor_id') .
|
||||
' or Product ID ' .
|
||||
$this->getInput('product_id'));
|
||||
returnServerError('Invalid Vendor ID ' . $this->getInput('vendor_id') . ' or Product ID ' . $this->getInput('product_id'));
|
||||
}
|
||||
$this->vendor = $vendor->innertext;
|
||||
|
||||
$product = $html->find('#contentdiv > h1 > a', 1);
|
||||
$product = $html->find('#contentdiv h1 > a', 1);
|
||||
if ($product != null) {
|
||||
$this->product = $product->innertext;
|
||||
}
|
||||
}
|
||||
|
||||
// Build the name of the feed.
|
||||
public function getName()
|
||||
{
|
||||
if ($this->getInput('vendor_id') == '') {
|
||||
@ -94,52 +114,4 @@ class CVEDetailsBridge extends BridgeAbstract
|
||||
|
||||
return $name;
|
||||
}
|
||||
|
||||
// Pull the data from the HTML response and fill the items..
|
||||
public function collectData()
|
||||
{
|
||||
if ($this->html == null) {
|
||||
$this->fetchContent();
|
||||
}
|
||||
|
||||
foreach ($this->html->find('#vulnslisttable .srrowns') as $i => $tr) {
|
||||
// There are some optional vulnerability types, which will be
|
||||
// added to the categories as well as the CWE number -- which is
|
||||
// always given.
|
||||
$categories = [$this->vendor];
|
||||
$enclosures = [];
|
||||
|
||||
$cwe = $tr->find('td', 2)->find('a', 0);
|
||||
if ($cwe != null) {
|
||||
$cwe = $cwe->innertext;
|
||||
$categories[] = 'CWE-' . $cwe;
|
||||
$enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe . '.html';
|
||||
}
|
||||
$c = $tr->find('td', 4)->innertext;
|
||||
if (trim($c) != '') {
|
||||
$categories[] = $c;
|
||||
}
|
||||
if ($this->product != '') {
|
||||
$categories[] = $this->product;
|
||||
}
|
||||
|
||||
// The CVE number itself
|
||||
$title = $tr->find('td', 1)->find('a', 0)->innertext;
|
||||
|
||||
$this->items[] = [
|
||||
'uri' => $tr->find('td', 1)->find('a', 0)->href,
|
||||
'title' => $title,
|
||||
'timestamp' => $tr->find('td', 5)->innertext,
|
||||
'content' => $tr->next_sibling()->innertext,
|
||||
'categories' => $categories,
|
||||
'enclosures' => $enclosures,
|
||||
'uid' => $tr->find('td', 1)->find('a', 0)->innertext,
|
||||
];
|
||||
|
||||
// We only want to fetch the latest 10 CVEs
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,44 +1,118 @@
|
||||
<?php
|
||||
|
||||
class CarThrottleBridge extends FeedExpander
|
||||
class CarThrottleBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Car Throttle ';
|
||||
const URI = 'https://www.carthrottle.com';
|
||||
const NAME = 'Car Throttle';
|
||||
const URI = 'https://www.carthrottle.com/';
|
||||
const DESCRIPTION = 'Get the latest car-related news from Car Throttle.';
|
||||
const MAINTAINER = 't0stiman';
|
||||
const DONATION_URI = 'https://ko-fi.com/tostiman';
|
||||
|
||||
const PARAMETERS = [
|
||||
'Show articles from these categories:' => [
|
||||
'news' => [
|
||||
'name' => 'news',
|
||||
'type' => 'checkbox'
|
||||
],
|
||||
'reviews' => [
|
||||
'name' => 'reviews',
|
||||
'type' => 'checkbox'
|
||||
],
|
||||
'features' => [
|
||||
'name' => 'features',
|
||||
'type' => 'checkbox'
|
||||
],
|
||||
'videos' => [
|
||||
'name' => 'videos',
|
||||
'type' => 'checkbox'
|
||||
],
|
||||
'gaming' => [
|
||||
'name' => 'gaming',
|
||||
'type' => 'checkbox'
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas('https://www.carthrottle.com/rss', 10);
|
||||
$this->items = [];
|
||||
|
||||
$this->handleCategory('news');
|
||||
$this->handleCategory('reviews');
|
||||
$this->handleCategory('features');
|
||||
$this->handleCategory2('videos', 'video');
|
||||
$this->handleCategory('gaming');
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
private function handleCategory($category)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
if ($this->getInput($category)) {
|
||||
$this->getArticles($category);
|
||||
}
|
||||
}
|
||||
|
||||
//fetch page
|
||||
$articlePage = getSimpleHTMLDOMCached($feedItem->link)
|
||||
or returnServerError('Could not retrieve ' . $feedItem->link);
|
||||
private function handleCategory2($categoryParameter, $categoryURLname)
|
||||
{
|
||||
if ($this->getInput($categoryParameter)) {
|
||||
$this->getArticles($categoryURLname);
|
||||
}
|
||||
}
|
||||
|
||||
$subtitle = $articlePage->find('p.standfirst', 0);
|
||||
$article = $articlePage->find('div.content_field', 0);
|
||||
private function getArticles($category)
|
||||
{
|
||||
$categoryPage = getSimpleHTMLDOMCached(self::URI . $category);
|
||||
|
||||
$item['content'] = str_get_html($subtitle . $article);
|
||||
//for each post
|
||||
foreach ($categoryPage->find('div.cmg-card') as $post) {
|
||||
$item = [];
|
||||
|
||||
//convert <iframe>s to <a>s. meant for embedded videos.
|
||||
foreach ($item['content']->find('iframe') as $found) {
|
||||
$iframeUrl = $found->getAttribute('src');
|
||||
$titleElement = $post->find('a.title')[0];
|
||||
$post_uri = self::URI . $titleElement->getAttribute('href');
|
||||
|
||||
if ($iframeUrl) {
|
||||
$found->outertext = '<a href="' . $iframeUrl . '">' . $iframeUrl . '</a>';
|
||||
if (!isset($post_uri) || $post_uri == '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$item['uri'] = $post_uri;
|
||||
$item['title'] = $titleElement->innertext;
|
||||
|
||||
$articlePage = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
$item['author'] = $this->parseAuthor($articlePage);
|
||||
|
||||
$articleImage = $articlePage->find('figure')[0];
|
||||
$article = $articlePage->find('div.first-column div.body')[0];
|
||||
|
||||
//remove ads
|
||||
foreach ($article->find('aside') as $ad) {
|
||||
$ad->outertext = '';
|
||||
}
|
||||
|
||||
$summary = $articlePage->find('div.summary')[0];
|
||||
|
||||
//these are supposed to be hidden
|
||||
foreach ($article->find('.visually-hidden') as $found) {
|
||||
$found->outertext = '';
|
||||
}
|
||||
|
||||
$item['content'] = $summary . $articleImage . $article;
|
||||
|
||||
array_push($this->items, $item);
|
||||
}
|
||||
}
|
||||
|
||||
private function parseAuthor($articlePage)
|
||||
{
|
||||
$authorDivs = $articlePage->find('div address');
|
||||
if (!$authorDivs) {
|
||||
return '';
|
||||
}
|
||||
|
||||
//remove scripts from the text
|
||||
foreach ($item['content']->find('script') as $remove) {
|
||||
$remove->outertext = '';
|
||||
$a = $authorDivs[0]->find('a')[0];
|
||||
if ($a) {
|
||||
return $a->innertext;
|
||||
}
|
||||
|
||||
return $item;
|
||||
return $authorDivs[0]->innertext;
|
||||
}
|
||||
}
|
||||
|
@ -34,10 +34,8 @@ class CaschyBridge extends FeedExpander
|
||||
);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
if (strpos($item['uri'], 'https://stadt-bremerhaven.de/') !== 0) {
|
||||
return $item;
|
||||
}
|
||||
@ -56,7 +54,7 @@ class CaschyBridge extends FeedExpander
|
||||
{
|
||||
// remove unwanted stuff
|
||||
foreach (
|
||||
$article->find('div.video-container, div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content,
|
||||
$article->find('div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content,
|
||||
div.wp-embed, p.wp-caption-text, script') as $element
|
||||
) {
|
||||
$element->remove();
|
||||
|
266
bridges/CentreFranceBridge.php
Normal file
266
bridges/CentreFranceBridge.php
Normal file
@ -0,0 +1,266 @@
|
||||
<?php
|
||||
|
||||
class CentreFranceBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Centre France Newspapers';
|
||||
const URI = 'https://www.centrefrance.com/';
|
||||
const DESCRIPTION = 'Common bridge for all Centre France group newspapers.';
|
||||
const CACHE_TIMEOUT = 7200; // 2h
|
||||
const MAINTAINER = 'quent1';
|
||||
const PARAMETERS = [
|
||||
'global' => [
|
||||
'newspaper' => [
|
||||
'name' => 'Newspaper',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'La Montagne' => 'lamontagne.fr',
|
||||
'Le Populaire du Centre' => 'lepopulaire.fr',
|
||||
'La République du Centre' => 'larep.fr',
|
||||
'Le Berry Républicain' => 'leberry.fr',
|
||||
'L\'Yonne Républicaine' => 'lyonne.fr',
|
||||
'L\'Écho Républicain' => 'lechorepublicain.fr',
|
||||
'Le Journal du Centre' => 'lejdc.fr',
|
||||
'L\'Éveil de la Haute-Loire' => 'leveil.fr',
|
||||
'Le Pays' => 'le-pays.fr'
|
||||
]
|
||||
],
|
||||
'remove-reserved-for-subscribers-articles' => [
|
||||
'name' => 'Remove reserved for subscribers articles',
|
||||
'type' => 'checkbox',
|
||||
'title' => 'Filter out articles that are only available to subscribers'
|
||||
],
|
||||
'limit' => [
|
||||
'name' => 'Limit',
|
||||
'type' => 'number',
|
||||
'title' => 'How many articles to fetch. 0 to disable.',
|
||||
'required' => true,
|
||||
'defaultValue' => 15
|
||||
]
|
||||
],
|
||||
'Local news' => [
|
||||
'locality-slug' => [
|
||||
'name' => 'Locality slug',
|
||||
'type' => 'text',
|
||||
'required' => false,
|
||||
'title' => 'Fetch articles for a specific locality. If not set, headlines from the front page will be used instead.',
|
||||
'exampleValue' => 'moulins-03000'
|
||||
],
|
||||
]
|
||||
];
|
||||
|
||||
private static array $monthNumberByFrenchName = [
|
||||
'janvier' => 1, 'février' => 2, 'mars' => 3, 'avril' => 4, 'mai' => 5, 'juin' => 6, 'juillet' => 7,
|
||||
'août' => 8, 'septembre' => 9, 'octobre' => 10, 'novembre' => 11, 'décembre' => 12
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$value = $this->getInput('limit');
|
||||
if (is_numeric($value) && (int)$value >= 0) {
|
||||
$limit = $value;
|
||||
} else {
|
||||
$limit = static::PARAMETERS['global']['limit']['defaultValue'];
|
||||
}
|
||||
|
||||
if (empty($this->getInput('newspaper'))) {
|
||||
return;
|
||||
}
|
||||
|
||||
$localitySlug = $this->getInput('locality-slug') ?? '';
|
||||
$alreadyFoundArticlesURIs = [];
|
||||
|
||||
$newspaperUrl = 'https://www.' . $this->getInput('newspaper') . '/' . $localitySlug . '/';
|
||||
$html = getSimpleHTMLDOM($newspaperUrl);
|
||||
|
||||
// Articles are detected through their titles
|
||||
foreach ($html->find('.c-titre') as $articleTitleDOMElement) {
|
||||
$articleLinkDOMElement = $articleTitleDOMElement->find('a', 0);
|
||||
|
||||
// Ignore articles in the « Les + partagés » block
|
||||
if (strpos($articleLinkDOMElement->id, 'les_plus_partages') !== false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$articleURI = $articleLinkDOMElement->href;
|
||||
|
||||
// If the URI has already been processed, ignore it
|
||||
if (in_array($articleURI, $alreadyFoundArticlesURIs, true)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If news are filtered for a specific locality, filter out article for other localities
|
||||
if ($localitySlug !== '' && !str_contains($articleURI, $localitySlug)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$articleTitle = '';
|
||||
|
||||
// If article is reserved for subscribers
|
||||
if ($articleLinkDOMElement->find('span.premium-picto', 0)) {
|
||||
if ($this->getInput('remove-reserved-for-subscribers-articles') === true) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$articleTitle .= '🔒 ';
|
||||
}
|
||||
|
||||
$articleTitleDOMElement = $articleLinkDOMElement->find('span[data-tb-title]', 0);
|
||||
if ($articleTitleDOMElement === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($limit > 0 && count($this->items) === $limit) {
|
||||
break;
|
||||
}
|
||||
|
||||
$articleTitle .= $articleLinkDOMElement->find('span[data-tb-title]', 0)->innertext;
|
||||
$articleFullURI = urljoin('https://www.' . $this->getInput('newspaper') . '/', $articleURI);
|
||||
|
||||
$item = [
|
||||
'title' => $articleTitle,
|
||||
'uri' => $articleFullURI,
|
||||
...$this->collectArticleData($articleFullURI)
|
||||
];
|
||||
$this->items[] = $item;
|
||||
|
||||
$alreadyFoundArticlesURIs[] = $articleURI;
|
||||
}
|
||||
}
|
||||
|
||||
private function collectArticleData($uri): array
|
||||
{
|
||||
$html = getSimpleHTMLDOMCached($uri, 86400 * 90); // 90d
|
||||
|
||||
$item = [
|
||||
'enclosures' => [],
|
||||
];
|
||||
|
||||
$articleInformations = $html->find('#content hgroup > div.typo-p3 > *');
|
||||
if (is_array($articleInformations) && $articleInformations !== []) {
|
||||
$publicationDateIndex = 0;
|
||||
|
||||
// Article author
|
||||
$probableAuthorName = strip_tags($articleInformations[0]->innertext);
|
||||
if (str_starts_with($probableAuthorName, 'Par ')) {
|
||||
$publicationDateIndex = 1;
|
||||
$item['author'] = substr($probableAuthorName, 4);
|
||||
}
|
||||
|
||||
// Article publication date
|
||||
preg_match('/Publié le (\d{2}) (.+) (\d{4})( à (\d{2})h(\d{2}))?/', strip_tags($articleInformations[$publicationDateIndex]->innertext), $articleDateParts);
|
||||
if ($articleDateParts !== [] && array_key_exists($articleDateParts[2], self::$monthNumberByFrenchName)) {
|
||||
$articleDate = new \DateTime('midnight');
|
||||
$articleDate->setDate($articleDateParts[3], self::$monthNumberByFrenchName[$articleDateParts[2]], $articleDateParts[1]);
|
||||
|
||||
if (count($articleDateParts) === 7) {
|
||||
$articleDate->setTime($articleDateParts[5], $articleDateParts[6]);
|
||||
}
|
||||
|
||||
$item['timestamp'] = $articleDate->getTimestamp();
|
||||
}
|
||||
}
|
||||
|
||||
$articleContent = $html->find('#content>div.flex+div.grid section>.z-10')[0] ?? null;
|
||||
if ($articleContent instanceof \simple_html_dom_node) {
|
||||
$articleHiddenParts = $articleContent->find('.ad-slot, #cf-digiteka-player');
|
||||
if (is_array($articleHiddenParts)) {
|
||||
foreach ($articleHiddenParts as $articleHiddenPart) {
|
||||
$articleContent->removeChild($articleHiddenPart);
|
||||
}
|
||||
}
|
||||
|
||||
$item['content'] = $articleContent->innertext;
|
||||
}
|
||||
|
||||
$articleIllustration = $html->find('#content>div.flex+div.grid section>figure>img');
|
||||
if (is_array($articleIllustration) && count($articleIllustration) === 1) {
|
||||
$item['enclosures'][] = $articleIllustration[0]->getAttribute('src');
|
||||
}
|
||||
|
||||
$articleAudio = $html->find('audio[src^="https://api.octopus.saooti.com/"]');
|
||||
if (is_array($articleAudio) && count($articleAudio) === 1) {
|
||||
$item['enclosures'][] = $articleAudio[0]->getAttribute('src');
|
||||
}
|
||||
|
||||
$articleTags = $html->find('#content>div.flex+div.grid section>.bg-gray-light>a.border-gray-dark');
|
||||
if (is_array($articleTags)) {
|
||||
$item['categories'] = array_map(static fn ($articleTag) => $articleTag->innertext, $articleTags);
|
||||
}
|
||||
|
||||
$explode = explode('_', $uri);
|
||||
$array_reverse = array_reverse($explode);
|
||||
$string = $array_reverse[0];
|
||||
$uid = rtrim($string, '/');
|
||||
if (is_numeric($uid)) {
|
||||
$item['uid'] = $uid;
|
||||
}
|
||||
|
||||
// If the article is a "grand format", we use another parsing strategy
|
||||
if ($item['content'] === '' && $html->find('article') !== []) {
|
||||
$articleContent = $html->find('article > section');
|
||||
foreach ($articleContent as $contentPart) {
|
||||
if ($contentPart->find('#journo') !== []) {
|
||||
$item['author'] = $contentPart->find('#journo')->innertext;
|
||||
continue;
|
||||
}
|
||||
|
||||
$item['content'] .= $contentPart->innertext;
|
||||
}
|
||||
}
|
||||
|
||||
$item['content'] = str_replace('<span class="p-premium">premium</span>', '🔒', $item['content']);
|
||||
$item['content'] = trim($item['content']);
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (empty($this->getInput('newspaper'))) {
|
||||
return static::NAME;
|
||||
}
|
||||
|
||||
$newspaperNameByDomain = array_flip(self::PARAMETERS['global']['newspaper']['values']);
|
||||
if (!isset($newspaperNameByDomain[$this->getInput('newspaper')])) {
|
||||
return static::NAME;
|
||||
}
|
||||
|
||||
$completeTitle = $newspaperNameByDomain[$this->getInput('newspaper')];
|
||||
|
||||
if (!empty($this->getInput('locality-slug'))) {
|
||||
$localityName = explode('-', $this->getInput('locality-slug'));
|
||||
array_pop($localityName);
|
||||
$completeTitle .= ' ' . ucfirst(implode('-', $localityName));
|
||||
}
|
||||
|
||||
return $completeTitle;
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
if (empty($this->getInput('newspaper'))) {
|
||||
return static::URI . '/favicon.ico';
|
||||
}
|
||||
|
||||
return 'https://www.' . $this->getInput('newspaper') . '/favicon.ico';
|
||||
}
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
$regex = '/^(https?:\/\/)?(www\.)?([a-z-]+\.fr)(\/)?([a-z-]+-[0-9]{5})?(\/)?$/';
|
||||
$url = strtolower($url);
|
||||
|
||||
if (preg_match($regex, $url, $urlMatches) === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!in_array($urlMatches[3], self::PARAMETERS['global']['newspaper']['values'], true)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return [
|
||||
'newspaper' => $urlMatches[3],
|
||||
'locality-slug' => empty($urlMatches[5]) ? null : $urlMatches[5]
|
||||
];
|
||||
}
|
||||
}
|
@ -18,25 +18,6 @@ class CeskaTelevizeBridge extends BridgeAbstract
|
||||
]
|
||||
];
|
||||
|
||||
private function fixChars($text)
|
||||
{
|
||||
return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
|
||||
}
|
||||
|
||||
private function getUploadTimeFromString($string)
|
||||
{
|
||||
if (strpos($string, 'dnes') !== false) {
|
||||
return strtotime('today');
|
||||
} elseif (strpos($string, 'včera') !== false) {
|
||||
return strtotime('yesterday');
|
||||
} elseif (!preg_match('/(\d+).\s(\d+).(\s(\d+))?/', $string, $match)) {
|
||||
returnServerError('Could not get date from Česká televize string');
|
||||
}
|
||||
|
||||
$date = sprintf('%04d-%02d-%02d', $match[3] ?? date('Y'), $match[2], $match[1]);
|
||||
return strtotime($date);
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getInput('url');
|
||||
@ -57,25 +38,43 @@ class CeskaTelevizeBridge extends BridgeAbstract
|
||||
$this->feedName .= " ({$category})";
|
||||
}
|
||||
|
||||
foreach ($html->find('#episodeListSection a[data-testid=next-link]') as $element) {
|
||||
$itemTitle = $element->find('h3', 0);
|
||||
$itemContent = $element->find('div[class^=content-]', 0);
|
||||
$itemDate = $element->find('div[class^=playTime-] span', 0);
|
||||
$itemThumbnail = $element->find('img', 0);
|
||||
$itemUri = self::URI . $element->getAttribute('href');
|
||||
foreach ($html->find('#episodeListSection a[data-testid=card]') as $element) {
|
||||
$itemContent = $element->find('p[class^=content-]', 0);
|
||||
$itemDate = $element->find('div[class^=playTime-] span, [data-testid=episode-item-broadcast] span', 0);
|
||||
|
||||
// Remove special characters and whitespace
|
||||
$cleanDate = preg_replace('/[^0-9.]/', '', $itemDate->plaintext);
|
||||
|
||||
$item = [
|
||||
'title' => $this->fixChars($itemTitle->plaintext),
|
||||
'uri' => $itemUri,
|
||||
'content' => '<img src="' . $itemThumbnail->getAttribute('src') . '" /><br />'
|
||||
. $this->fixChars($itemContent->plaintext),
|
||||
'timestamp' => $this->getUploadTimeFromString($itemDate->plaintext)
|
||||
'title' => $this->fixChars($element->find('h3', 0)->plaintext),
|
||||
'uri' => self::URI . $element->getAttribute('href'),
|
||||
'content' => '<img src="' . $element->find('img', 0)->getAttribute('srcset') . '" /><br />' . $this->fixChars($itemContent->plaintext),
|
||||
'timestamp' => $this->getUploadTimeFromString($cleanDate),
|
||||
];
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
private function getUploadTimeFromString($string)
|
||||
{
|
||||
if (strpos($string, 'dnes') !== false) {
|
||||
return strtotime('today');
|
||||
} elseif (strpos($string, 'včera') !== false) {
|
||||
return strtotime('yesterday');
|
||||
} elseif (!preg_match('/(\d+).(\d+).((\d+))?/', $string, $match)) {
|
||||
returnServerError('Could not get date from Česká televize string');
|
||||
}
|
||||
|
||||
$date = sprintf('%04d-%02d-%02d', $match[3] ?? date('Y'), $match[2], $match[1]);
|
||||
return strtotime($date);
|
||||
}
|
||||
|
||||
private function fixChars($text)
|
||||
{
|
||||
return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
return $this->feedUri ?? parent::getURI();
|
||||
|
@ -79,9 +79,9 @@ class CodebergBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->getURI());
|
||||
|
||||
$html = defaultLinkTo($html, $this->getURI());
|
||||
$url = $this->getURI();
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$html = defaultLinkTo($html, $url);
|
||||
|
||||
switch ($this->queriedContext) {
|
||||
case 'Commits':
|
||||
@ -181,7 +181,12 @@ class CodebergBridge extends BridgeAbstract
|
||||
$item['title'] = $message->find('span.message-wrapper', 0)->plaintext;
|
||||
$item['uri'] = $tr->find('td.sha', 0)->find('a', 0)->href;
|
||||
$item['author'] = $tr->find('td.author', 0)->plaintext;
|
||||
$item['timestamp'] = $tr->find('td', 3)->find('span', 0)->title;
|
||||
|
||||
$var = $tr->find('td', 3);
|
||||
$var1 = $var->find('span', 0);
|
||||
if ($var1) {
|
||||
$item['timestamp'] = $var1->title;
|
||||
}
|
||||
|
||||
if ($message->find('pre.commit-body', 0)) {
|
||||
$message->find('pre.commit-body', 0)->style = '';
|
||||
@ -200,17 +205,22 @@ class CodebergBridge extends BridgeAbstract
|
||||
*/
|
||||
private function extractIssues($html)
|
||||
{
|
||||
$div = $html->find('div.issue.list', 0);
|
||||
$issueList = $html->find('div#issue-list', 0);
|
||||
|
||||
foreach ($div->find('li.item') as $li) {
|
||||
foreach ($issueList->find('div.flex-item') as $div) {
|
||||
$item = [];
|
||||
|
||||
$number = trim($li->find('a.index,ml-0.mr-2', 0)->plaintext);
|
||||
$number = trim($div->find('a.index,ml-0.mr-2', 0)->plaintext);
|
||||
|
||||
$item['title'] = $li->find('a.title', 0)->plaintext . ' (' . $number . ')';
|
||||
$item['uri'] = $li->find('a.title', 0)->href;
|
||||
$item['timestamp'] = $li->find('span.time-since', 0)->title;
|
||||
$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
|
||||
$item['title'] = $div->find('a.issue-title', 0)->plaintext . ' (' . $number . ')';
|
||||
$item['uri'] = $div->find('a.issue-title', 0)->href;
|
||||
|
||||
$time = $div->find('relative-time.time-since', 0);
|
||||
if ($time) {
|
||||
$item['timestamp'] = $time->datetime;
|
||||
}
|
||||
|
||||
//$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
|
||||
|
||||
// Fetch issue page
|
||||
$issuePage = getSimpleHTMLDOMCached($item['uri'], 3600);
|
||||
@ -218,7 +228,7 @@ class CodebergBridge extends BridgeAbstract
|
||||
|
||||
$item['content'] = $issuePage->find('div.timeline-item.comment.first', 0)->find('div.render-content.markup', 0);
|
||||
|
||||
foreach ($li->find('a.ui.label') as $label) {
|
||||
foreach ($div->find('a.ui.label') as $label) {
|
||||
$item['categories'][] = $label->plaintext;
|
||||
}
|
||||
|
||||
@ -250,7 +260,11 @@ class CodebergBridge extends BridgeAbstract
|
||||
}
|
||||
|
||||
$item['author'] = $div->find('a.author', 0)->innertext;
|
||||
$item['timestamp'] = $div->find('span.time-since', 0)->title;
|
||||
|
||||
$timeSince = $div->find('span.time-since', 0);
|
||||
if ($timeSince) {
|
||||
$item['timestamp'] = $timeSince->title;
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
@ -261,23 +275,36 @@ class CodebergBridge extends BridgeAbstract
|
||||
*/
|
||||
private function extractPulls($html)
|
||||
{
|
||||
$div = $html->find('div.issue.list', 0);
|
||||
$div = $html->find('div#issue-list', 0);
|
||||
|
||||
foreach ($div->find('li.item') as $li) {
|
||||
$var2 = $div->find('div.flex-item');
|
||||
foreach ($var2 as $li) {
|
||||
$item = [];
|
||||
|
||||
$number = trim($li->find('a.index,ml-0.mr-2', 0)->plaintext);
|
||||
|
||||
$item['title'] = $li->find('a.title', 0)->plaintext . ' (' . $number . ')';
|
||||
$item['uri'] = $li->find('a.title', 0)->href;
|
||||
$item['timestamp'] = $li->find('span.time-since', 0)->title;
|
||||
$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
|
||||
$a = $li->find('a.issue-title', 0);
|
||||
$item['title'] = $a->plaintext . ' (' . $number . ')';
|
||||
$item['uri'] = $a->href;
|
||||
|
||||
$time = $li->find('relative-time.time-since', 0);
|
||||
if ($time) {
|
||||
$item['timestamp'] = $time->datetime;
|
||||
}
|
||||
|
||||
// Extracting the author is a bit awkward after they changed their html
|
||||
//$desc = $li->find('div.desc', 0);
|
||||
//$item['author'] = $desc->find('a', 1)->plaintext;
|
||||
|
||||
// Fetch pull request page
|
||||
$pullRequestPage = getSimpleHTMLDOMCached($item['uri'], 3600);
|
||||
$pullRequestPage = defaultLinkTo($pullRequestPage, self::URI);
|
||||
|
||||
$item['content'] = $pullRequestPage->find('ui.timeline', 0)->find('div.render-content.markup', 0);
|
||||
$var = $pullRequestPage->find('ui.timeline', 0);
|
||||
if ($var) {
|
||||
$var1 = $var->find('div.render-content.markup', 0);
|
||||
$item['content'] = $var1;
|
||||
}
|
||||
|
||||
foreach ($li->find('a.ui.label') as $label) {
|
||||
$item['categories'][] = $label->plaintext;
|
||||
@ -380,6 +407,9 @@ EOD;
|
||||
*/
|
||||
private function stripSvg($html)
|
||||
{
|
||||
if ($html === null) {
|
||||
return null;
|
||||
}
|
||||
if ($html->find('svg', 0)) {
|
||||
$html->find('svg', 0)->outertext = '';
|
||||
}
|
||||
|
@ -2,59 +2,65 @@
|
||||
|
||||
class ComicsKingdomBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'stjohnjohnson';
|
||||
const MAINTAINER = 'TReKiE';
|
||||
// const MAINTAINER = 'stjohnjohnson';
|
||||
const NAME = 'Comics Kingdom Unofficial RSS';
|
||||
const URI = 'https://comicskingdom.com/';
|
||||
const URI = 'https://wp.comicskingdom.com/wp-json/wp/v2/ck_comic';
|
||||
const CACHE_TIMEOUT = 21600; // 6h
|
||||
const DESCRIPTION = 'Comics Kingdom Unofficial RSS';
|
||||
const PARAMETERS = [ [
|
||||
'comicname' => [
|
||||
'name' => 'comicname',
|
||||
'name' => 'Name of comic',
|
||||
'type' => 'text',
|
||||
'exampleValue' => 'mutts',
|
||||
'title' => 'The name of the comic in the URL after https://comicskingdom.com/',
|
||||
'required' => true
|
||||
],
|
||||
'limit' => [
|
||||
'name' => 'Limit',
|
||||
'type' => 'number',
|
||||
'title' => 'The number of recent comics to get',
|
||||
'defaultValue' => 10
|
||||
]
|
||||
]];
|
||||
|
||||
protected $comicName;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->getURI(), [], [], true, false);
|
||||
$json = getContents($this->getURI());
|
||||
$data = json_decode($json, false);
|
||||
|
||||
// Get author from first page
|
||||
$author = $html->find('div.author p', 0);
|
||||
;
|
||||
if (isset($data[0]->_embedded->{'wp:term'}[0][0])) {
|
||||
$this->comicName = $data[0]->_embedded->{'wp:term'}[0][0]->name;
|
||||
}
|
||||
|
||||
// Get current date/link
|
||||
$link = $html->find('meta[property=og:url]', -1)->content;
|
||||
for ($i = 0; $i < 3; $i++) {
|
||||
foreach ($data as $comicitem) {
|
||||
$item = [];
|
||||
|
||||
$page = getSimpleHTMLDOM($link);
|
||||
|
||||
$imagelink = $page->find('meta[property=og:image]', 0)->content;
|
||||
|
||||
$date = explode('/', $link);
|
||||
|
||||
$item['id'] = $imagelink;
|
||||
$item['uri'] = $link;
|
||||
$item['author'] = $author;
|
||||
$item['title'] = 'Comics Kingdom ' . $this->getInput('comicname');
|
||||
$item['timestamp'] = DateTime::createFromFormat('Y-m-d', $date[count($date) - 1])->getTimestamp();
|
||||
$item['content'] = '<img src="' . $imagelink . '" />';
|
||||
|
||||
$item['id'] = $comicitem->id;
|
||||
$item['uri'] = $comicitem->yoast_head_json->og_url;
|
||||
$item['author'] = str_ireplace('By ', '', $comicitem->ck_comic_byline);
|
||||
$item['title'] = $comicitem->yoast_head_json->title;
|
||||
$item['timestamp'] = $comicitem->date;
|
||||
$item['content'] = '<img src="' . $comicitem->yoast_head_json->og_image[0]->url . '" />';
|
||||
$this->items[] = $item;
|
||||
$link = $page->find('div.comic-viewer-inline a', 0)->href;
|
||||
if (empty($link)) {
|
||||
break; // allow bridge to continue if there's less than 3 comics
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
if (!is_null($this->getInput('comicname'))) {
|
||||
return self::URI . urlencode($this->getInput('comicname'));
|
||||
$params = [
|
||||
'ck_feature' => $this->getInput('comicname'),
|
||||
'per_page' => $this->getInput('limit'),
|
||||
'date_inclusive' => 'true',
|
||||
'order' => 'desc',
|
||||
'page' => '1',
|
||||
'_embed' => 'true'
|
||||
];
|
||||
|
||||
return self::URI . '?' . http_build_query($params);
|
||||
}
|
||||
|
||||
return parent::getURI();
|
||||
@ -62,8 +68,8 @@ class ComicsKingdomBridge extends BridgeAbstract
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (!is_null($this->getInput('comicname'))) {
|
||||
return $this->getInput('comicname') . ' - Comics Kingdom';
|
||||
if ($this->comicName) {
|
||||
return $this->comicName . ' - Comics Kingdom';
|
||||
}
|
||||
|
||||
return parent::getName();
|
||||
|
@ -12,9 +12,8 @@ class CommonDreamsBridge extends FeedExpander
|
||||
$this->collectExpandableDatas('http://www.commondreams.org/rss.xml', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
@ -13,11 +13,9 @@ class CourrierInternationalBridge extends FeedExpander
|
||||
$this->collectExpandableDatas(static::URI . 'feed/all/rss.xml', 20);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
$articlePage = getSimpleHTMLDOMCached($feedItem->link);
|
||||
$articlePage = getSimpleHTMLDOMCached($item['uri']);
|
||||
$content = $articlePage->find('.article-text, depeche-text', 0);
|
||||
if (!$content) {
|
||||
return $item;
|
||||
|
@ -109,7 +109,7 @@ class CrewbayBridge extends BridgeAbstract
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getURI();
|
||||
$html = getSimpleHTMLDOM($url) or returnClientError('No results for this query.');
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
|
||||
$annonces = $html->find('#SearchResults div.result');
|
||||
$limit = 0;
|
||||
|
309
bridges/CssSelectorBridge.php
Normal file
309
bridges/CssSelectorBridge.php
Normal file
@ -0,0 +1,309 @@
|
||||
<?php
|
||||
|
||||
class CssSelectorBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'ORelio';
|
||||
const NAME = 'CSS Selector Bridge';
|
||||
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
|
||||
const DESCRIPTION = 'Convert any site to RSS feed using CSS selectors (Advanced Users)';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'home_page' => [
|
||||
'name' => 'Site URL: Home page with latest articles',
|
||||
'exampleValue' => 'https://example.com/blog/',
|
||||
'required' => true
|
||||
],
|
||||
'url_selector' => [
|
||||
'name' => 'Selector for article links or their parent elements',
|
||||
'title' => <<<EOT
|
||||
This bridge works using CSS selectors, e.g. "a.article" will match all <a class="article"
|
||||
href="URL">TITLE</a> on home page, each one being treated as a feed item.
|
||||
Instead of just a link you can selet one of its parent element. Everything inside that
|
||||
element becomes feed item content, e.g. image and summary present on home page.
|
||||
When doing so, the first link inside the selected element becomes feed item URL/Title.
|
||||
EOT,
|
||||
'exampleValue' => 'a.article',
|
||||
'required' => true
|
||||
],
|
||||
'url_pattern' => [
|
||||
'name' => '[Optional] Pattern for site URLs to keep in feed',
|
||||
'title' => 'Optionally filter items by applying a regular expression on their URL',
|
||||
'exampleValue' => '/blog/article/.*',
|
||||
],
|
||||
'content_selector' => [
|
||||
'name' => '[Optional] Selector to expand each article content',
|
||||
'title' => <<<EOT
|
||||
When specified, the bridge will fetch each article from its URL
|
||||
and extract content using the provided selector (Slower!)
|
||||
EOT,
|
||||
'exampleValue' => 'article.content',
|
||||
],
|
||||
'content_cleanup' => [
|
||||
'name' => '[Optional] Content cleanup: List of items to remove',
|
||||
'title' => 'Selector for unnecessary elements to remove inside article contents.',
|
||||
'exampleValue' => 'div.ads, div.comments',
|
||||
],
|
||||
'title_cleanup' => [
|
||||
'name' => '[Optional] Text to remove from expanded article title',
|
||||
'title' => <<<EOT
|
||||
When fetching each article page, feed item title comes from page title.
|
||||
Specify here some text from page title that need to be removed, e.g. " | BlogName".
|
||||
EOT,
|
||||
'exampleValue' => ' | BlogName',
|
||||
],
|
||||
'discard_thumbnail' => [
|
||||
'name' => '[Optional] Discard thumbnail set by site author',
|
||||
'title' => 'Some sites set their logo as thumbnail for every article. Use this option to discard it.',
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'thumbnail_as_header' => [
|
||||
'name' => '[Optional] Insert thumbnail as article header',
|
||||
'title' => 'Insert article main image on top of article contents.',
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'limit' => self::LIMIT
|
||||
]
|
||||
];
|
||||
|
||||
protected $feedName = '';
|
||||
protected $homepageUrl = '';
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$url = $this->homepageUrl;
|
||||
if (empty($url)) {
|
||||
$url = parent::getURI();
|
||||
}
|
||||
return $url;
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (!empty($this->feedName)) {
|
||||
return $this->feedName;
|
||||
}
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->homepageUrl = $this->getInput('home_page');
|
||||
$url_selector = $this->getInput('url_selector');
|
||||
$url_pattern = $this->getInput('url_pattern');
|
||||
$content_selector = $this->getInput('content_selector');
|
||||
$content_cleanup = $this->getInput('content_cleanup');
|
||||
$title_cleanup = $this->getInput('title_cleanup');
|
||||
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
||||
$thumbnail_as_header = $this->getInput('thumbnail_as_header');
|
||||
$limit = $this->getInput('limit') ?? 10;
|
||||
|
||||
$html = defaultLinkTo(getSimpleHTMLDOM($this->homepageUrl), $this->homepageUrl);
|
||||
$this->feedName = $this->titleCleanup($this->getPageTitle($html), $title_cleanup);
|
||||
$items = $this->htmlFindEntries($html, $url_selector, $url_pattern, $limit, $content_cleanup);
|
||||
|
||||
if (empty($content_selector)) {
|
||||
$this->items = $items;
|
||||
} else {
|
||||
foreach ($items as $item) {
|
||||
$item = $this->expandEntryWithSelector(
|
||||
$item['uri'],
|
||||
$content_selector,
|
||||
$content_cleanup,
|
||||
$title_cleanup,
|
||||
$item['title']
|
||||
);
|
||||
if ($discard_thumbnail && isset($item['enclosures'])) {
|
||||
unset($item['enclosures']);
|
||||
}
|
||||
if ($thumbnail_as_header && isset($item['enclosures'][0])) {
|
||||
$item['content'] = '<p><img src="' . $item['enclosures'][0] . '" /></p>' . $item['content'];
|
||||
}
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter a list of URLs using a pattern and limit
|
||||
* @param array $links List of URLs
|
||||
* @param string $url_pattern Pattern to look for in URLs
|
||||
* @param int $limit Optional maximum amount of URLs to return
|
||||
* @return array Array of URLs
|
||||
*/
|
||||
protected function filterUrlList($links, $url_pattern, $limit = 0)
|
||||
{
|
||||
if (!empty($url_pattern)) {
|
||||
$url_pattern = '/' . str_replace('/', '\/', $url_pattern) . '/';
|
||||
$links = array_filter($links, function ($url) use ($url_pattern) {
|
||||
return preg_match($url_pattern, $url) === 1;
|
||||
});
|
||||
}
|
||||
|
||||
if ($limit > 0 && count($links) > $limit) {
|
||||
$links = array_slice($links, 0, $limit);
|
||||
}
|
||||
|
||||
return $links;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve title from webpage URL or DOM
|
||||
* @param string|object $page URL or DOM to retrieve title from
|
||||
* @return string Webpage title
|
||||
*/
|
||||
protected function getPageTitle($page)
|
||||
{
|
||||
if (is_string($page)) {
|
||||
$page = getSimpleHTMLDOMCached($page);
|
||||
}
|
||||
$title = html_entity_decode($page->find('title', 0)->plaintext);
|
||||
return $title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean Article title. Remove constant part that appears in every title such as blog name.
|
||||
* @param string $title Title to clean, e.g. "Article Name | BlogName"
|
||||
* @param string $title_cleanup string to remove from webpage title, e.g. " | BlogName"
|
||||
* @return string Cleaned Title
|
||||
*/
|
||||
protected function titleCleanup($title, $title_cleanup)
|
||||
{
|
||||
if (!empty($title) && !empty($title_cleanup)) {
|
||||
return trim(str_replace($title_cleanup, '', $title));
|
||||
}
|
||||
return $title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all elements from HTML content matching cleanup selector
|
||||
* @param string|object $content HTML content as HTML object or string
|
||||
* @return string|object Cleaned content (same type as input)
|
||||
*/
|
||||
protected function cleanArticleContent($content, $cleanup_selector)
|
||||
{
|
||||
$string_convert = false;
|
||||
if (is_string($content)) {
|
||||
$string_convert = true;
|
||||
$content = str_get_html($content);
|
||||
}
|
||||
|
||||
if (!empty($cleanup_selector)) {
|
||||
foreach ($content->find($cleanup_selector) as $item_to_clean) {
|
||||
$item_to_clean->outertext = '';
|
||||
}
|
||||
}
|
||||
|
||||
if ($string_convert) {
|
||||
$content = $content->outertext;
|
||||
}
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve first N link+title+truncated-content from webpage URL or DOM satisfying the specified criteria
|
||||
* @param string|object $page URL or DOM to retrieve feed items from
|
||||
* @param string $url_selector DOM selector for matching links or their parent element
|
||||
* @param string $url_pattern Optional filter to keep only links matching the pattern
|
||||
* @param int $limit Optional maximum amount of URLs to return
|
||||
* @param string $content_cleanup Optional selector for removing elements, e.g. "div.ads, div.comments"
|
||||
* @return array of items {'uri': entry_url, 'title': entry_title, ['content': when present in DOM] }
|
||||
*/
|
||||
protected function htmlFindEntries($page, $url_selector, $url_pattern = '', $limit = 0, $content_cleanup = null)
|
||||
{
|
||||
if (is_string($page)) {
|
||||
$page = getSimpleHTMLDOM($page);
|
||||
}
|
||||
|
||||
$links = $page->find($url_selector);
|
||||
|
||||
if (empty($links)) {
|
||||
returnClientError('No results for URL selector');
|
||||
}
|
||||
|
||||
$link_to_item = [];
|
||||
foreach ($links as $link) {
|
||||
$item = [];
|
||||
if ($link->innertext != $link->plaintext) {
|
||||
$item['content'] = $link->innertext;
|
||||
}
|
||||
if ($link->tag != 'a') {
|
||||
$link = $link->find('a', 0);
|
||||
if (is_null($link)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
$item['uri'] = $link->href;
|
||||
$item['title'] = $link->plaintext;
|
||||
if (isset($item['content'])) {
|
||||
$item['content'] = convertLazyLoading($item['content']);
|
||||
$item['content'] = defaultLinkTo($item['content'], $item['uri']);
|
||||
$item['content'] = $this->cleanArticleContent($item['content'], $content_cleanup);
|
||||
}
|
||||
$link_to_item[$link->href] = $item;
|
||||
}
|
||||
|
||||
if (empty($link_to_item)) {
|
||||
returnClientError('The provided URL selector matches some elements, but they do not contain links.');
|
||||
}
|
||||
|
||||
$links = $this->filterUrlList(array_keys($link_to_item), $url_pattern, $limit);
|
||||
|
||||
if (empty($links)) {
|
||||
returnClientError('No results for URL pattern');
|
||||
}
|
||||
|
||||
$items = [];
|
||||
foreach ($links as $link) {
|
||||
$items[] = $link_to_item[$link];
|
||||
}
|
||||
|
||||
return $items;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve article content from its URL using content selector and return a feed item
|
||||
* @param string $entry_url URL to retrieve article from
|
||||
* @param string $content_selector HTML selector for extracting content, e.g. "article.content"
|
||||
* @param string $content_cleanup Optional selector for removing elements, e.g. "div.ads, div.comments"
|
||||
* @param string $title_cleanup Optional string to remove from article title, e.g. " | BlogName"
|
||||
* @param string $title_default Optional title to use when could not extract title reliably
|
||||
* @return array Entry data: uri, title, content
|
||||
*/
|
||||
protected function expandEntryWithSelector($entry_url, $content_selector, $content_cleanup = null, $title_cleanup = null, $title_default = null)
|
||||
{
|
||||
if (empty($content_selector)) {
|
||||
returnClientError('Please specify a content selector');
|
||||
}
|
||||
|
||||
$entry_html = getSimpleHTMLDOMCached($entry_url);
|
||||
$item = html_find_seo_metadata($entry_html);
|
||||
|
||||
if (empty($item['uri'])) {
|
||||
$item['uri'] = $entry_url;
|
||||
}
|
||||
|
||||
if (empty($item['title'])) {
|
||||
$article_title = $this->getPageTitle($entry_html, $title_cleanup);
|
||||
if (!empty($title_default) && (empty($article_title) || $article_title === $this->feedName)) {
|
||||
$article_title = $title_default;
|
||||
}
|
||||
$item['title'] = $article_title;
|
||||
}
|
||||
|
||||
$item['title'] = $this->titleCleanup($item['title'], $title_cleanup);
|
||||
|
||||
$article_content = $entry_html->find($content_selector);
|
||||
|
||||
if (!empty($article_content)) {
|
||||
$article_content = $article_content[0];
|
||||
$article_content = convertLazyLoading($article_content);
|
||||
$article_content = defaultLinkTo($article_content, $entry_url);
|
||||
$article_content = $this->cleanArticleContent($article_content, $content_cleanup);
|
||||
$item['content'] = $article_content;
|
||||
} else if (!empty($item['content'])) {
|
||||
$item['content'] .= '<br /><p><em>Could not extract full content, selector may need to be updated.</em></p>';
|
||||
}
|
||||
|
||||
return $item;
|
||||
}
|
||||
}
|
462
bridges/CssSelectorComplexBridge.php
Normal file
462
bridges/CssSelectorComplexBridge.php
Normal file
@ -0,0 +1,462 @@
|
||||
<?php
|
||||
|
||||
class CssSelectorComplexBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'Lars Stegman';
|
||||
const NAME = 'CSS Selector Complex Bridge';
|
||||
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
|
||||
const DESCRIPTION = <<<EOT
|
||||
Convert any site to RSS feed using CSS selectors (Advanced Users). The bridge first selects
|
||||
the element describing the article entries. It then extracts the links to the articles from
|
||||
these elements. It then, depending on the setting "Load article from page", either parses
|
||||
the selected elements, or downloads the page for each article and parses those. Parsing the
|
||||
elements or page is done using the provided selectors.
|
||||
EOT;
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'home_page' => [
|
||||
'name' => 'Site URL: Page with latest articles',
|
||||
'exampleValue' => 'https://example.com/blog/',
|
||||
'required' => true
|
||||
],
|
||||
'cookie' => [
|
||||
'name' => '[Optional] Cookie',
|
||||
'title' => <<<EOT
|
||||
Use when the website does not send the page contents, unless a static cookie is included.
|
||||
EOT,
|
||||
'exampleValue' => 'sessionId=deadb33f'
|
||||
],
|
||||
'title_cleanup' => [
|
||||
'name' => '[Optional] Text to remove from feed title',
|
||||
'title' => <<<EOT
|
||||
Text to remove from the feed title, which is read from the article list page.
|
||||
EOT,
|
||||
'exampleValue' => ' | BlogName',
|
||||
],
|
||||
'entry_element_selector' => [
|
||||
'name' => 'Selector for article entry elements',
|
||||
'title' => <<<EOT
|
||||
This bridge works using CSS selectors, e.g. "div.article" will match all
|
||||
<div class="article">...</div> on home page, each one being treated as a feed item.
|
||||
|
||||
Use the URL selector option to select the `a` element with the
|
||||
`href` to the article link. If this option is not configured, the first encountered
|
||||
`a` element is used.
|
||||
EOT,
|
||||
'exampleValue' => 'div.article',
|
||||
'required' => true
|
||||
],
|
||||
'url_selector' => [
|
||||
'name' => '[Optional] Selector for link elements',
|
||||
'title' => <<<EOT
|
||||
The selector to find `a` elements in the entry element. If empty,
|
||||
the first encountered `a` element is used. The `href` property
|
||||
is used to create entries in the feed.
|
||||
EOT,
|
||||
'exampleValue' => 'a.article',
|
||||
'defaultValue' => 'a'
|
||||
],
|
||||
'url_pattern' => [
|
||||
'name' => '[Optional] Pattern for site URLs to keep in feed',
|
||||
'title' => 'Optionally filter items by applying a regular expression on their URL',
|
||||
'exampleValue' => '/blog/article/.*',
|
||||
],
|
||||
'limit' => self::LIMIT,
|
||||
'use_article_pages' => [
|
||||
'name' => 'Load article from page',
|
||||
'title' => <<<EOT
|
||||
If true, the article page is load and parsed to get the article contents using
|
||||
the css selectors. (Slower!)
|
||||
Otherwise, the element selected by the article entry selector is used.
|
||||
EOT,
|
||||
'type' => 'checkbox'
|
||||
],
|
||||
'article_page_content_selector' => [
|
||||
'name' => '[Optional] Selector to select article element',
|
||||
'title' => 'Extract the article from its page using the provided selector',
|
||||
'exampleValue' => 'article.content',
|
||||
],
|
||||
'content_cleanup' => [
|
||||
'name' => '[Optional] Content cleanup: selector for items to remove',
|
||||
'title' => 'Selector for unnecessary elements to remove inside article contents.',
|
||||
'exampleValue' => 'div.ads, div.comments',
|
||||
],
|
||||
'title_selector' => [
|
||||
'name' => '[Optional] Selector for the article title',
|
||||
'title' => 'Selector to select the article title',
|
||||
'defaultValue' => 'h1'
|
||||
],
|
||||
'category_selector' => [
|
||||
'name' => '[Optional] Categories',
|
||||
'title' => <<<EOT
|
||||
Selector to extract the catgories the article has
|
||||
EOT,
|
||||
'exampleValue' => 'span.category, #main-category'
|
||||
],
|
||||
'author_selector' => [
|
||||
'name' => '[Optional] Author',
|
||||
'title' => <<<EOT
|
||||
Selector to extract the author of the article. If multiple elements are selected
|
||||
the first one is used.
|
||||
EOT,
|
||||
'exampleValue' => 'span#author'
|
||||
],
|
||||
'time_selector' => [
|
||||
'name' => '[Optional] Time selector',
|
||||
'title' => <<<EOT
|
||||
Selector to extract the timestamp of the article. If the element
|
||||
is an html5 `time` element, the value for the `datetime` attribute is used.
|
||||
EOT,
|
||||
],
|
||||
'time_format' => [
|
||||
'name' => '[Optional] Format string for parsing time',
|
||||
'title' => <<<EOT
|
||||
The format to use to parse the timestamp. See
|
||||
https://www.php.net/manual/en/datetimeimmutable.createfromformat.php
|
||||
for the format specification.
|
||||
EOT
|
||||
],
|
||||
'remove_styling' => [
|
||||
'name' => '[Optional] Remove styling',
|
||||
'title' => 'Remove class and style attributes from the page elements',
|
||||
'type' => 'checkbox'
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
private $feedName = '';
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$url = $this->getInput('home_page');
|
||||
if (empty($url)) {
|
||||
$url = parent::getURI();
|
||||
}
|
||||
return $url;
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (!empty($this->feedName)) {
|
||||
return $this->feedName;
|
||||
}
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
protected function getHeaders()
|
||||
{
|
||||
$headers = [];
|
||||
$cookie = $this->getInput('cookie');
|
||||
if (!empty($cookie)) {
|
||||
$headers[] = 'Cookie: ' . $cookie;
|
||||
}
|
||||
|
||||
return $headers;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getInput('home_page');
|
||||
$headers = $this->getHeaders();
|
||||
|
||||
$entry_element_selector = $this->getInput('entry_element_selector');
|
||||
$url_selector = $this->getInput('url_selector');
|
||||
$url_pattern = $this->getInput('url_pattern');
|
||||
$limit = $this->getInput('limit') ?? 10;
|
||||
|
||||
$use_article_pages = $this->getInput('use_article_pages');
|
||||
$article_page_content_selector = $this->getInput('article_page_content_selector');
|
||||
$content_cleanup = $this->getInput('content_cleanup');
|
||||
$title_selector = $this->getInput('title_selector');
|
||||
$title_cleanup = $this->getInput('title_cleanup');
|
||||
$time_selector = $this->getInput('time_selector');
|
||||
$time_format = $this->getInput('time_format');
|
||||
|
||||
$category_selector = $this->getInput('category_selector');
|
||||
$author_selector = $this->getInput('author_selector');
|
||||
$remove_styling = $this->getInput('remove_styling');
|
||||
|
||||
$html = defaultLinkTo(getSimpleHTMLDOM($url, $headers), $url);
|
||||
$this->feedName = $this->getTitle($html, $title_cleanup);
|
||||
$entry_elements = $this->htmlFindEntryElements($html, $entry_element_selector, $url_selector, $url_pattern, $limit);
|
||||
|
||||
if (empty($entry_elements)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Fetch the elements from the article pages.
|
||||
if ($use_article_pages) {
|
||||
if (empty($article_page_content_selector)) {
|
||||
returnClientError('`Article selector` is required when `Load article page` is enabled');
|
||||
}
|
||||
|
||||
foreach (array_keys($entry_elements) as $uri) {
|
||||
$entry_elements[$uri] = $this->fetchArticleElementFromPage($uri, $article_page_content_selector);
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($entry_elements as $uri => $element) {
|
||||
$entry = $this->parseEntryElement(
|
||||
$element,
|
||||
$title_selector,
|
||||
$author_selector,
|
||||
$category_selector,
|
||||
$time_selector,
|
||||
$time_format,
|
||||
$content_cleanup,
|
||||
$this->feedName,
|
||||
$remove_styling
|
||||
);
|
||||
|
||||
$entry['uri'] = $uri;
|
||||
$this->items[] = $entry;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter a list of URLs using a pattern and limit
|
||||
* @param array $links List of URLs
|
||||
* @param string $url_pattern Pattern to look for in URLs
|
||||
* @param int $limit Optional maximum amount of URLs to return
|
||||
* @return array Array of URLs
|
||||
*/
|
||||
protected function filterUrlList($links, $url_pattern, $limit = 0)
|
||||
{
|
||||
if (!empty($url_pattern)) {
|
||||
$url_pattern = '/' . str_replace('/', '\/', $url_pattern) . '/';
|
||||
$links = array_filter($links, function ($url) use ($url_pattern) {
|
||||
return preg_match($url_pattern, $url) === 1;
|
||||
});
|
||||
}
|
||||
|
||||
if ($limit > 0 && count($links) > $limit) {
|
||||
$links = array_slice($links, 0, $limit);
|
||||
}
|
||||
|
||||
return $links;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve title from webpage URL or DOM
|
||||
* @param string|object $page URL or DOM to retrieve title from
|
||||
* @param string $title_cleanup optional string to remove from webpage title, e.g. " | BlogName"
|
||||
* @return string Webpage title
|
||||
*/
|
||||
protected function getTitle($page, $title_cleanup)
|
||||
{
|
||||
if (is_string($page)) {
|
||||
$page = getSimpleHTMLDOMCached($page, 86400, $this->getHeaders());
|
||||
}
|
||||
$title = html_entity_decode($page->find('title', 0)->plaintext);
|
||||
if (!empty($title)) {
|
||||
$title = trim(str_replace($title_cleanup, '', $title));
|
||||
}
|
||||
|
||||
return $title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all elements from HTML content matching cleanup selector
|
||||
* @param string|object $content HTML content as HTML object or string
|
||||
* @return string|object Cleaned content (same type as input)
|
||||
*/
|
||||
protected function cleanArticleContent($content, $cleanup_selector, $remove_styling)
|
||||
{
|
||||
$string_convert = false;
|
||||
if (is_string($content)) {
|
||||
$string_convert = true;
|
||||
$content = str_get_html($content);
|
||||
}
|
||||
|
||||
if (!empty($cleanup_selector)) {
|
||||
foreach ($content->find($cleanup_selector) as $item_to_clean) {
|
||||
$item_to_clean->outertext = '';
|
||||
}
|
||||
}
|
||||
|
||||
if ($remove_styling) {
|
||||
foreach (['class', 'style'] as $attribute_to_remove) {
|
||||
foreach ($content->find('[' . $attribute_to_remove . ']') as $item_to_clean) {
|
||||
$item_to_clean->removeAttribute($attribute_to_remove);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($string_convert) {
|
||||
$content = $content->outertext;
|
||||
}
|
||||
return $content;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieve first N link+element from webpage URL or DOM satisfying the specified criteria
|
||||
* @param string|object $page URL or DOM to retrieve feed items from
|
||||
* @param string $entry_selector DOM selector for matching HTML elements that contain article
|
||||
* entries
|
||||
* @param string $url_selector DOM selector for matching links
|
||||
* @param string $url_pattern Optional filter to keep only links matching the pattern
|
||||
* @param int $limit Optional maximum amount of URLs to return
|
||||
* @return array of items { <uri> => <html-element> }
|
||||
*/
|
||||
protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0)
|
||||
{
|
||||
if (is_string($page)) {
|
||||
$page = getSimpleHTMLDOM($page, $this->getHeaders());
|
||||
}
|
||||
|
||||
$entryElements = $page->find($entry_selector);
|
||||
if (empty($entryElements)) {
|
||||
returnClientError('No entry elements for entry selector');
|
||||
}
|
||||
|
||||
// Extract URIs with the associated entry element
|
||||
$links_with_elements = [];
|
||||
foreach ($entryElements as $entry) {
|
||||
$url_element = $entry->find($url_selector, 0);
|
||||
if (is_null($url_element)) {
|
||||
// No `a` element found in this entry
|
||||
if ($entry->tag == 'a') {
|
||||
$url_element = $entry;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
$links_with_elements[$url_element->href] = $entry;
|
||||
}
|
||||
|
||||
if (empty($links_with_elements)) {
|
||||
returnClientError('The provided URL selector matches some elements, but they do not
|
||||
contain links.');
|
||||
}
|
||||
|
||||
// Filter using the URL pattern
|
||||
$filtered_urls = $this->filterUrlList(array_keys($links_with_elements), $url_pattern, $limit);
|
||||
|
||||
if (empty($filtered_urls)) {
|
||||
returnClientError('No results for URL pattern');
|
||||
}
|
||||
|
||||
$items = [];
|
||||
foreach ($filtered_urls as $link) {
|
||||
$items[$link] = $links_with_elements[$link];
|
||||
}
|
||||
|
||||
return $items;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieve article element from its URL using content selector and return the DOM element
|
||||
* @param string $entry_url URL to retrieve article from
|
||||
* @param string $content_selector HTML selector for extracting content, e.g. "article.content"
|
||||
* @return article DOM element
|
||||
*/
|
||||
protected function fetchArticleElementFromPage($entry_url, $content_selector)
|
||||
{
|
||||
$entry_html = getSimpleHTMLDOMCached($entry_url, 86400, $this->getHeaders());
|
||||
$article_content = $entry_html->find($content_selector, 0);
|
||||
|
||||
if (is_null($article_content)) {
|
||||
returnClientError('Could not get article content at URL: ' . $entry_url);
|
||||
}
|
||||
|
||||
$article_content = defaultLinkTo($article_content, $entry_url);
|
||||
return $article_content;
|
||||
}
|
||||
|
||||
protected function parseTimeStrAsTimestamp($timeStr, $format)
|
||||
{
|
||||
$date = date_parse_from_format($format, $timeStr);
|
||||
if ($date['error_count'] != 0) {
|
||||
returnClientError('Error while parsing time string');
|
||||
}
|
||||
|
||||
$timestamp = mktime(
|
||||
$date['hour'],
|
||||
$date['minute'],
|
||||
$date['second'],
|
||||
$date['month'],
|
||||
$date['day'],
|
||||
$date['year']
|
||||
);
|
||||
|
||||
if ($timestamp == false) {
|
||||
returnClientError('Error while creating timestamp');
|
||||
}
|
||||
|
||||
return $timestamp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve article content from its URL using content selector and return a feed item
|
||||
* @param object $entry_html A DOM element containing the article
|
||||
* @param string $title_selector A selector to the article title from the article
|
||||
* @param string $author_selector A selector to find the article author
|
||||
* @param string $time_selector A selector to get the article publication time.
|
||||
* @param string $time_format The format to parse the time_selector.
|
||||
* @param string $content_cleanup Optional selector for removing elements, e.g. "div.ads,
|
||||
* div.comments"
|
||||
* @param string $title_default Optional title to use when could not extract title reliably
|
||||
* @param bool $remove_styling Whether to remove class and style attributes from the HTML
|
||||
* @return array Entry data: uri, title, content
|
||||
*/
|
||||
protected function parseEntryElement(
|
||||
$entry_html,
|
||||
$title_selector = null,
|
||||
$author_selector = null,
|
||||
$category_selector = null,
|
||||
$time_selector = null,
|
||||
$time_format = null,
|
||||
$content_cleanup = null,
|
||||
$title_default = null,
|
||||
$remove_styling = false
|
||||
) {
|
||||
$article_content = convertLazyLoading($entry_html);
|
||||
|
||||
$article_title = '';
|
||||
if (is_null($title_selector)) {
|
||||
$article_title = $title_default;
|
||||
} else {
|
||||
$titleElement = $entry_html->find($title_selector, 0);
|
||||
if ($titleElement) {
|
||||
$article_title = trim($titleElement->innertext);
|
||||
}
|
||||
}
|
||||
|
||||
$author = null;
|
||||
if (!is_null($author_selector) && $author_selector != '') {
|
||||
$author = trim($entry_html->find($author_selector, 0)->innertext);
|
||||
}
|
||||
|
||||
$categories = [];
|
||||
if (!is_null($category_selector && $category_selector != '')) {
|
||||
$category_elements = $entry_html->find($category_selector);
|
||||
foreach ($category_elements as $category_element) {
|
||||
$categories[] = trim($category_element->innertext);
|
||||
}
|
||||
}
|
||||
|
||||
$time = null;
|
||||
if (!is_null($time_selector) && $time_selector != '') {
|
||||
$time_element = $entry_html->find($time_selector, 0);
|
||||
$time = $time_element->getAttribute('datetime');
|
||||
if (empty($time)) {
|
||||
$time = $time_element->innertext;
|
||||
}
|
||||
|
||||
$this->parseTimeStrAsTimestamp($time, $time_format);
|
||||
}
|
||||
|
||||
$article_content = $this->cleanArticleContent($article_content, $content_cleanup, $remove_styling);
|
||||
|
||||
$item = [];
|
||||
$item['title'] = $article_title;
|
||||
$item['content'] = $article_content;
|
||||
$item['categories'] = $categories;
|
||||
$item['timestamp'] = $time;
|
||||
$item['author'] = $author;
|
||||
return $item;
|
||||
}
|
||||
}
|
119
bridges/CssSelectorFeedExpanderBridge.php
Normal file
119
bridges/CssSelectorFeedExpanderBridge.php
Normal file
@ -0,0 +1,119 @@
|
||||
<?php
|
||||
|
||||
class CssSelectorFeedExpanderBridge extends CssSelectorBridge
|
||||
{
|
||||
const MAINTAINER = 'ORelio';
|
||||
const NAME = 'CSS Selector Feed Expander';
|
||||
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
|
||||
const DESCRIPTION = 'Expand any site RSS feed using CSS selectors (Advanced Users)';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'feed' => [
|
||||
'name' => 'Feed: URL of truncated RSS feed',
|
||||
'exampleValue' => 'https://example.com/feed.xml',
|
||||
'required' => true
|
||||
],
|
||||
'content_selector' => [
|
||||
'name' => 'Selector for each article content',
|
||||
'title' => <<<EOT
|
||||
This bridge works using CSS selectors, e.g. "div.article" will match <div class="article">.
|
||||
Everything inside that element becomes feed item content.
|
||||
EOT,
|
||||
'exampleValue' => 'article.content',
|
||||
'required' => true
|
||||
],
|
||||
'content_cleanup' => [
|
||||
'name' => '[Optional] Content cleanup: List of items to remove',
|
||||
'title' => 'Selector for unnecessary elements to remove inside article contents.',
|
||||
'exampleValue' => 'div.ads, div.comments',
|
||||
],
|
||||
'dont_expand_metadata' => [
|
||||
'name' => '[Optional] Don\'t expand metadata',
|
||||
'title' => "This bridge will attempt to fill missing fields using metadata from the webpage.\nCheck to disable.",
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'discard_thumbnail' => [
|
||||
'name' => '[Optional] Discard thumbnail set by site author',
|
||||
'title' => 'Some sites set their logo as thumbnail for every article. Use this option to discard it.',
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'thumbnail_as_header' => [
|
||||
'name' => '[Optional] Insert thumbnail as article header',
|
||||
'title' => 'Insert article main image on top of article contents.',
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'limit' => self::LIMIT
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getInput('feed');
|
||||
$content_selector = $this->getInput('content_selector');
|
||||
$content_cleanup = $this->getInput('content_cleanup');
|
||||
$dont_expand_metadata = $this->getInput('dont_expand_metadata');
|
||||
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
||||
$thumbnail_as_header = $this->getInput('thumbnail_as_header');
|
||||
$limit = $this->getInput('limit');
|
||||
|
||||
$feedParser = new FeedParser();
|
||||
$xml = getContents($url);
|
||||
$source_feed = $feedParser->parseFeed($xml);
|
||||
$items = $source_feed['items'];
|
||||
|
||||
// Map Homepage URL (Default: Root page)
|
||||
if (isset($source_feed['uri'])) {
|
||||
$this->homepageUrl = $source_feed['uri'];
|
||||
} else {
|
||||
$this->homepageUrl = urljoin($url, '/');
|
||||
}
|
||||
|
||||
// Map Feed Name (Default: Domain name)
|
||||
if (isset($source_feed['title'])) {
|
||||
$this->feedName = $source_feed['title'];
|
||||
} else {
|
||||
$this->feedName = explode('/', urljoin($url, '/'))[2];
|
||||
}
|
||||
|
||||
// Apply item limit (Default: Global limit)
|
||||
if ($limit > 0) {
|
||||
$items = array_slice($items, 0, $limit);
|
||||
}
|
||||
|
||||
// Expand feed items (CssSelectorBridge)
|
||||
foreach ($items as $item_from_feed) {
|
||||
$item_expanded = $this->expandEntryWithSelector(
|
||||
$item_from_feed['uri'],
|
||||
$content_selector,
|
||||
$content_cleanup
|
||||
);
|
||||
|
||||
if ($dont_expand_metadata) {
|
||||
// Take feed item, only replace content from expanded data
|
||||
$content = $item_expanded['content'];
|
||||
$item_expanded = $item_from_feed;
|
||||
$item_expanded['content'] = $content;
|
||||
} else {
|
||||
// Take expanded item, but give priority to metadata already in source item
|
||||
foreach ($item_from_feed as $field => $val) {
|
||||
if ($field !== 'content' && !empty($val)) {
|
||||
$item_expanded[$field] = $val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($discard_thumbnail && isset($item_expanded['enclosures'])) {
|
||||
unset($item_expanded['enclosures']);
|
||||
}
|
||||
|
||||
if ($thumbnail_as_header && isset($item_expanded['enclosures'][0])) {
|
||||
$item_expanded['content'] = '<p><img src="'
|
||||
. $item_expanded['enclosures'][0]
|
||||
. '" /></p>'
|
||||
. $item_expanded['content'];
|
||||
}
|
||||
|
||||
$this->items[] = $item_expanded;
|
||||
}
|
||||
}
|
||||
}
|
@ -47,8 +47,10 @@ class CubariBridge extends BridgeAbstract
|
||||
*/
|
||||
public function collectData()
|
||||
{
|
||||
$jsonSite = getContents($this->getInput('gist'));
|
||||
$jsonFile = json_decode($jsonSite, true);
|
||||
// TODO: fix trivial SSRF
|
||||
$json = getContents($this->getInput('gist'));
|
||||
|
||||
$jsonFile = Json::decode($json);
|
||||
|
||||
$this->mangaTitle = $jsonFile['title'];
|
||||
|
||||
@ -66,12 +68,14 @@ class CubariBridge extends BridgeAbstract
|
||||
{
|
||||
$url = $this->getInput('gist');
|
||||
|
||||
preg_match('/\/([a-z]*)\.githubusercontent.com(.*)/', $url, $matches);
|
||||
|
||||
// raw or gist is first match.
|
||||
$unencoded = $matches[1] . $matches[2];
|
||||
|
||||
return base64_encode($unencoded);
|
||||
if (preg_match('/\/([a-z]*)\.githubusercontent.com(.*)/', $url, $matches)) {
|
||||
// raw or gist is first match.
|
||||
$unencoded = $matches[1] . $matches[2];
|
||||
return base64_encode($unencoded);
|
||||
} else {
|
||||
// todo: fix this
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
private function getSanitizedHash($string)
|
||||
|
129
bridges/CubariProxyBridge.php
Normal file
129
bridges/CubariProxyBridge.php
Normal file
@ -0,0 +1,129 @@
|
||||
<?php
|
||||
|
||||
class CubariProxyBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Cubari Proxy';
|
||||
const MAINTAINER = 'phantop';
|
||||
const URI = 'https://cubari.moe';
|
||||
const DESCRIPTION = 'Returns chapters from Cubari.';
|
||||
const PARAMETERS = [[
|
||||
'service' => [
|
||||
'name' => 'Content service',
|
||||
'type' => 'list',
|
||||
'defaultValue' => 'mangadex',
|
||||
'values' => [
|
||||
'MangAventure' => 'mangadventure',
|
||||
'MangaDex' => 'mangadex',
|
||||
'MangaKatana' => 'mangakatana',
|
||||
'MangaSee' => 'mangasee',
|
||||
]
|
||||
],
|
||||
'series' => [
|
||||
'name' => 'Series ID/Name',
|
||||
'exampleValue' => '8c1d7d0c-e0b7-4170-941d-29f652c3c19d', # KnH
|
||||
'required' => true,
|
||||
],
|
||||
'fetch' => [
|
||||
'name' => 'Fetch chapter page images',
|
||||
'type' => 'list',
|
||||
'title' => 'Places chapter images in feed contents. Entries will consume more bandwidth.',
|
||||
'defaultValue' => 'c',
|
||||
'values' => [
|
||||
'None' => 'n',
|
||||
'Content' => 'c',
|
||||
'Enclosure' => 'e'
|
||||
]
|
||||
],
|
||||
'limit' => self::LIMIT
|
||||
]];
|
||||
|
||||
private $title;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$limit = $this->getInput('limit') ?? 10;
|
||||
|
||||
$url = parent::getURI() . '/read/api/' . $this->getInput('service') . '/series/' . $this->getInput('series');
|
||||
$json = Json::decode(getContents($url));
|
||||
$this->title = $json['title'];
|
||||
|
||||
$chapters = $json['chapters'];
|
||||
krsort($chapters);
|
||||
|
||||
$count = 0;
|
||||
foreach ($chapters as $number => $element) {
|
||||
$item = [];
|
||||
$item['uri'] = $this->getURI() . '/' . $number;
|
||||
|
||||
if ($element['title']) {
|
||||
$item['title'] = $number . ' - ' . $element['title'];
|
||||
} else {
|
||||
$item['title'] = 'Volume ' . $element['volume'] . ' Chapter ' . $number;
|
||||
}
|
||||
|
||||
$group = '1';
|
||||
if (isset($element['release_date'])) {
|
||||
$dates = $element['release_date'];
|
||||
$date = max($dates);
|
||||
$item['timestamp'] = $date;
|
||||
$group = array_keys($dates, $date)[0];
|
||||
}
|
||||
$page = $element['groups'][$group];
|
||||
$item['author'] = $json['groups'][$group];
|
||||
$api = parent::getURI() . $page;
|
||||
$item['uid'] = $page;
|
||||
$item['comments'] = $api;
|
||||
|
||||
if ($this->getInput('fetch') != 'n') {
|
||||
$pages = [];
|
||||
try {
|
||||
$jsonp = getContents($api);
|
||||
$pages = Json::decode($jsonp);
|
||||
} catch (HttpException $e) {
|
||||
// allow error 500, as it's effectively a 429
|
||||
if ($e->getCode() != 500) {
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
if ($this->getInput('fetch') == 'e') {
|
||||
$item['enclosures'] = $pages;
|
||||
}
|
||||
if ($this->getInput('fetch') == 'c') {
|
||||
$item['content'] = '';
|
||||
foreach ($pages as $img) {
|
||||
$item['content'] .= '<img src="' . $img . '"/>';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($count++ == $limit) {
|
||||
break;
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
$name = parent::getName();
|
||||
if (isset($this->title)) {
|
||||
$name .= ' - ' . $this->title;
|
||||
}
|
||||
return $name;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$uri = parent::getURI();
|
||||
if ($this->getInput('service')) {
|
||||
$uri .= '/read/' . $this->getInput('service') . '/' . $this->getInput('series');
|
||||
}
|
||||
return $uri;
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return parent::getURI() . '/static/favicon.png';
|
||||
}
|
||||
}
|
107
bridges/DRKBlutspendeBridge.php
Normal file
107
bridges/DRKBlutspendeBridge.php
Normal file
@ -0,0 +1,107 @@
|
||||
<?php
|
||||
|
||||
class DRKBlutspendeBridge extends FeedExpander
|
||||
{
|
||||
const MAINTAINER = 'User123698745';
|
||||
const NAME = 'DRK-Blutspende';
|
||||
const BASE_URI = 'https://www.drk-blutspende.de';
|
||||
const URI = self::BASE_URI;
|
||||
const CACHE_TIMEOUT = 60 * 60 * 1; // 1 hour
|
||||
const DESCRIPTION = 'German Red Cross (Deutsches Rotes Kreuz) blood donation service feed with more details';
|
||||
const CONTEXT_APPOINTMENTS = 'Termine';
|
||||
const PARAMETERS = [
|
||||
self::CONTEXT_APPOINTMENTS => [
|
||||
'term' => [
|
||||
'name' => 'PLZ / Ort',
|
||||
'required' => true,
|
||||
'exampleValue' => '12555',
|
||||
],
|
||||
'radius' => [
|
||||
'name' => 'Umkreis in km',
|
||||
'type' => 'number',
|
||||
'exampleValue' => 10,
|
||||
],
|
||||
'limit_days' => [
|
||||
'name' => 'Limit von Tagen',
|
||||
'title' => 'Nur Termine innerhalb der nächsten x Tagen',
|
||||
'type' => 'number',
|
||||
'exampleValue' => 28,
|
||||
],
|
||||
'limit_items' => [
|
||||
'name' => 'Limit von Terminen',
|
||||
'title' => 'Nicht mehr als x Termine',
|
||||
'type' => 'number',
|
||||
'required' => true,
|
||||
'defaultValue' => 20,
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$limitItems = intval($this->getInput('limit_items'));
|
||||
$this->collectExpandableDatas(self::buildAppointmentsURI(), $limitItems);
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$html = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
$detailsElement = $html->find('.details', 0);
|
||||
|
||||
$dateElement = $detailsElement->find('.datum', 0);
|
||||
$dateLines = self::explodeLines($dateElement->plaintext);
|
||||
|
||||
$addressElement = $detailsElement->find('.adresse', 0);
|
||||
$addressLines = self::explodeLines($addressElement->plaintext);
|
||||
|
||||
$infoElement = $detailsElement->find('.angebote > h4 + p', 0);
|
||||
$info = $infoElement ? $infoElement->innertext : '';
|
||||
|
||||
$imageElements = $detailsElement->find('.fotos img');
|
||||
|
||||
$item['title'] = $dateLines[0] . ' ' . $dateLines[1] . ' ' . $addressLines[0] . ' - ' . $addressLines[1];
|
||||
|
||||
$item['content'] = <<<HTML
|
||||
<p><b>{$dateLines[0]} {$dateLines[1]}</b></p>
|
||||
<p>{$addressElement->innertext}</p>
|
||||
<p>{$info}</p>
|
||||
HTML;
|
||||
|
||||
foreach ($imageElements as $imageElement) {
|
||||
$src = $imageElement->getAttribute('src');
|
||||
$item['content'] .= <<<HTML
|
||||
<p><img src="{$src}"></p>
|
||||
HTML;
|
||||
}
|
||||
|
||||
$item['description'] = null;
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
if ($this->queriedContext === self::CONTEXT_APPOINTMENTS) {
|
||||
return str_replace('.rss?', '?', self::buildAppointmentsURI());
|
||||
}
|
||||
return parent::getURI();
|
||||
}
|
||||
|
||||
private function buildAppointmentsURI()
|
||||
{
|
||||
$term = $this->getInput('term') ?? '';
|
||||
$radius = $this->getInput('radius') ?? '';
|
||||
$limitDays = intval($this->getInput('limit_days'));
|
||||
$dateTo = $limitDays > 0 ? date('Y-m-d', time() + (60 * 60 * 24 * $limitDays)) : '';
|
||||
return self::BASE_URI . '/blutspendetermine/termine.rss?date_to=' . $dateTo . '&radius=' . $radius . '&term=' . $term;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of strings, each of which is a substring of string formed by splitting it on boundaries formed by line breaks.
|
||||
*/
|
||||
private function explodeLines(string $text): array
|
||||
{
|
||||
return array_map('trim', preg_split('/(\s*(\r\n|\n|\r)\s*)+/', $text));
|
||||
}
|
||||
}
|
102
bridges/DacksnackBridge.php
Normal file
102
bridges/DacksnackBridge.php
Normal file
@ -0,0 +1,102 @@
|
||||
<?PHP
|
||||
|
||||
class DacksnackBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Däcksnack';
|
||||
const URI = 'https://www.tidningendacksnack.se';
|
||||
const DESCRIPTION = 'Latest news by the magazine Däcksnack';
|
||||
const MAINTAINER = 'ajain-93';
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return self::URI . '/upload/favicon/2591047722.png';
|
||||
}
|
||||
|
||||
private function parseSwedishDates($dateString)
|
||||
{
|
||||
// Mapping of Swedish month names to English month names
|
||||
$monthNames = [
|
||||
'januari' => '01',
|
||||
'februari' => '02',
|
||||
'mars' => '03',
|
||||
'april' => '04',
|
||||
'maj' => '05',
|
||||
'juni' => '06',
|
||||
'juli' => '07',
|
||||
'augusti' => '08',
|
||||
'september' => '09',
|
||||
'oktober' => '10',
|
||||
'november' => '11',
|
||||
'december' => '12'
|
||||
];
|
||||
|
||||
// Split the date string into parts
|
||||
list($day, $monthName, $year) = explode(' ', $dateString);
|
||||
|
||||
// Convert month name to month number
|
||||
$month = $monthNames[$monthName];
|
||||
|
||||
// Format to a string recognizable by DateTime
|
||||
$formattedDate = sprintf('%04d-%02d-%02d', $year, $month, $day);
|
||||
|
||||
// Create a DateTime object
|
||||
$dateValue = new DateTime($formattedDate);
|
||||
|
||||
if ($dateValue) {
|
||||
$dateValue->setTime(0, 0); // Set time to 00:00
|
||||
return $dateValue->getTimestamp();
|
||||
}
|
||||
|
||||
return $dateValue ? $dateValue->getTimestamp() : false;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$NEWSURL = self::URI;
|
||||
$html = getSimpleHTMLDOMCached($NEWSURL, 18000);
|
||||
|
||||
foreach ($html->find('a.main-news-item') as $element) {
|
||||
// Debug::log($element);
|
||||
|
||||
$title = trim($element->find('h2', 0)->plaintext);
|
||||
$category = trim($element->find('.category-tag', 0)->plaintext);
|
||||
$url = self::URI . $element->getAttribute('href');
|
||||
$published = $this->parseSwedishDates(trim($element->find('.published', 0)->plaintext));
|
||||
|
||||
$article_html = getSimpleHTMLDOMCached($url, 18000);
|
||||
$article_content = $article_html->find('#ctl00_ContentPlaceHolder1_NewsArticleVeiw_pnlArticle', 0);
|
||||
|
||||
$figure = self::URI . $article_content->find('img.news-image', 0)->getAttribute('src');
|
||||
$figure_caption = $article_content->find('.image-description', 0)->plaintext;
|
||||
$author = $article_content->find('span.main-article-author', 0)->plaintext;
|
||||
$preamble = $article_content->find('h4.main-article-ingress', 0)->plaintext;
|
||||
|
||||
$article_text = '';
|
||||
foreach ($article_content->find('div') as $div) {
|
||||
if (!$div->hasAttribute('class')) {
|
||||
$article_text = $div;
|
||||
}
|
||||
}
|
||||
|
||||
// Use a regular expression to extract the name
|
||||
if (preg_match('/Text:\s*(.*?)\s*Foto:/', $author, $matches)) {
|
||||
$author = $matches[1]; // This will contain 'Jonna Jansson'
|
||||
}
|
||||
|
||||
$content = '<b> [' . $category . '] <i>' . $preamble . '</i></b><br/><br/>';
|
||||
$content .= '<figure>';
|
||||
$content .= '<img src=' . $figure . '>';
|
||||
$content .= '<figcaption>' . $figure_caption . '</figcaption>';
|
||||
$content .= '</figure>';
|
||||
$content .= $article_text;
|
||||
|
||||
$this->items[] = [
|
||||
'uri' => $url,
|
||||
'title' => $title,
|
||||
'author' => $author,
|
||||
'timestamp' => $published,
|
||||
'content' => trim($content),
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
56
bridges/DagensNyheterDirektBridge.php
Normal file
56
bridges/DagensNyheterDirektBridge.php
Normal file
@ -0,0 +1,56 @@
|
||||
<?PHP
|
||||
|
||||
class DagensNyheterDirektBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Dagens Nyheter Direkt';
|
||||
const URI = 'https://www.dn.se/direkt/';
|
||||
const BASEURL = 'https://www.dn.se';
|
||||
const DESCRIPTION = 'Latest news summarised by Dagens Nyheter';
|
||||
const MAINTAINER = 'ajain-93';
|
||||
const LIMIT = 20;
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://cdn.dn-static.se/images/favicon__c2dd3284b46ffdf4d520536e526065fa8.svg';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$NEWSURL = self::BASEURL . '/ajax/direkt/';
|
||||
|
||||
$html = getSimpleHTMLDOM($NEWSURL);
|
||||
|
||||
foreach ($html->find('article') as $element) {
|
||||
$link = $element->find('button', 0)->getAttribute('data-link');
|
||||
$datetime = $element->getAttribute('data-publication-time');
|
||||
$url = self::BASEURL . $link;
|
||||
$title = $element->find('h2', 0)->plaintext;
|
||||
$author = $element->find('div.ds-byline__titles', 0)->plaintext;
|
||||
|
||||
$article_content = $element->find('div.direkt-post__content', 0);
|
||||
$article_html = '';
|
||||
|
||||
$figure = $element->find('figure', 0);
|
||||
|
||||
if ($figure) {
|
||||
$article_html = $figure->find('img', 0) . '<p><i>' . $figure->find('figcaption', 0) . '</i></p>';
|
||||
}
|
||||
|
||||
foreach ($article_content->find('p') as $p) {
|
||||
$article_html = $article_html . $p;
|
||||
}
|
||||
|
||||
$this->items[] = [
|
||||
'uri' => $url,
|
||||
'title' => $title,
|
||||
'author' => trim($author),
|
||||
'timestamp' => $datetime,
|
||||
'content' => trim($article_html),
|
||||
];
|
||||
|
||||
if (count($this->items) > self::LIMIT) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
96
bridges/DailythanthiBridge.php
Normal file
96
bridges/DailythanthiBridge.php
Normal file
@ -0,0 +1,96 @@
|
||||
<?php
|
||||
|
||||
class DailythanthiBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Dailythanthi';
|
||||
const URI = 'https://www.dailythanthi.com';
|
||||
const DESCRIPTION = 'Retrieve news from dailythanthi.com';
|
||||
const MAINTAINER = 'tillcash';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'topic' => [
|
||||
'name' => 'topic',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'news' => [
|
||||
'tamilnadu' => 'news/state',
|
||||
'india' => 'news/india',
|
||||
'world' => 'news/world',
|
||||
'sirappu-katturaigal' => 'news/sirappukatturaigal',
|
||||
],
|
||||
'cinema' => [
|
||||
'news' => 'cinema/cinemanews',
|
||||
],
|
||||
'sports' => [
|
||||
'sports' => 'sports',
|
||||
'cricket' => 'sports/cricket',
|
||||
'football' => 'sports/football',
|
||||
'tennis' => 'sports/tennis',
|
||||
'hockey' => 'sports/hockey',
|
||||
'other-sports' => 'sports/othersports',
|
||||
],
|
||||
'devotional' => [
|
||||
'devotional' => 'others/devotional',
|
||||
'aalaya-varalaru' => 'aalaya-varalaru',
|
||||
],
|
||||
],
|
||||
],
|
||||
],
|
||||
];
|
||||
|
||||
public function getName()
|
||||
{
|
||||
$topic = $this->getKey('topic');
|
||||
return self::NAME . ($topic ? ' - ' . ucfirst($topic) : '');
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$dom = getSimpleHTMLDOM(self::URI . '/' . $this->getInput('topic'));
|
||||
|
||||
foreach ($dom->find('div.ListingNewsWithMEDImage') as $element) {
|
||||
$slug = $element->find('a', 1);
|
||||
$title = $element->find('h3', 0);
|
||||
if (!$slug || !$title) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$url = self::URI . $slug->href;
|
||||
$date = $element->find('span', 1);
|
||||
$date = $date ? $date->{'data-datestring'} : '';
|
||||
|
||||
$this->items[] = [
|
||||
'content' => $this->constructContent($url),
|
||||
'timestamp' => $date ? $date . 'UTC' : '',
|
||||
'title' => $title->plaintext,
|
||||
'uid' => $slug->href,
|
||||
'uri' => $url,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
private function constructContent($url)
|
||||
{
|
||||
$dom = getSimpleHTMLDOMCached($url);
|
||||
|
||||
$article = $dom->find('div.details-content-story', 0);
|
||||
if (!$article) {
|
||||
return 'Content Not Found';
|
||||
}
|
||||
|
||||
// Remove ads
|
||||
foreach ($article->find('div[id*="_ad"]') as $remove) {
|
||||
$remove->outertext = '';
|
||||
}
|
||||
|
||||
// Correct image tag in $article
|
||||
foreach ($article->find('h-img') as $img) {
|
||||
$img->parent->outertext = sprintf('<p><img src="%s"></p>', $img->src);
|
||||
}
|
||||
|
||||
$image = $dom->find('div.main-image-caption-container img', 0);
|
||||
$image = $image ? '<p>' . $image->outertext . '</p>' : '';
|
||||
|
||||
return $image . $article;
|
||||
}
|
||||
}
|
@ -10,9 +10,11 @@ class DansTonChatBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM(self::URI . 'latest.html');
|
||||
$url = self::URI . 'latest.html';
|
||||
$dom = getSimpleHTMLDOM($url);
|
||||
|
||||
foreach ($html->find('div.item') as $element) {
|
||||
$items = $dom->find('div.item');
|
||||
foreach ($items as $element) {
|
||||
$item = [];
|
||||
$item['uri'] = $element->find('a', 0)->href;
|
||||
$titleContent = $element->find('h3 a', 0);
|
||||
|
@ -9,7 +9,7 @@ class DarkReadingBridge extends FeedExpander
|
||||
|
||||
const PARAMETERS = [ [
|
||||
'feed' => [
|
||||
'name' => 'Feed',
|
||||
'name' => 'Feed (NOT IN USE)',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'All Dark Reading Stories' => '000_AllArticles',
|
||||
@ -41,24 +41,13 @@ class DarkReadingBridge extends FeedExpander
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$feed = $this->getInput('feed');
|
||||
$feed_splitted = explode('_', $feed);
|
||||
$feed_id = $feed_splitted[0];
|
||||
$feed_name = $feed_splitted[1];
|
||||
if (empty($feed) || !ctype_digit($feed_id) || !preg_match('/[A-Za-z%20\/]/', $feed_name)) {
|
||||
returnClientError('Invalid feed, please check the "feed" parameter.');
|
||||
}
|
||||
$feed_url = $this->getURI() . 'rss_simple.asp';
|
||||
if ($feed_id != '000') {
|
||||
$feed_url .= '?f_n=' . $feed_id . '&f_ln=' . $feed_name;
|
||||
}
|
||||
$feed_url = 'https://www.darkreading.com/rss.xml';
|
||||
$limit = $this->getInput('limit') ?? 10;
|
||||
$this->collectExpandableDatas($feed_url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($article);
|
||||
$item['enclosures'] = []; //remove author profile picture
|
||||
@ -72,7 +61,7 @@ class DarkReadingBridge extends FeedExpander
|
||||
|
||||
private function extractArticleContent($article)
|
||||
{
|
||||
$content = $article->find('div.article-content', 0)->innertext;
|
||||
$content = $article->find('div.ContentModule-Wrapper', 0)->innertext;
|
||||
|
||||
foreach (
|
||||
[
|
||||
|
@ -43,9 +43,8 @@ class DauphineLibereBridge extends FeedExpander
|
||||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
@ -1,40 +0,0 @@
|
||||
<?php
|
||||
|
||||
class DavesTrailerPageBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'johnnygroovy';
|
||||
const NAME = 'Daves Trailer Page Bridge';
|
||||
const URI = 'https://www.davestrailerpage.co.uk/';
|
||||
const DESCRIPTION = 'Last trailers in HD thanks to Dave.';
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM(static::URI)
|
||||
or returnClientError('No results for this query.');
|
||||
|
||||
$curr_date = null;
|
||||
foreach ($html->find('tr') as $tr) {
|
||||
// If it's a date row, update the current date
|
||||
if ($tr->align == 'center') {
|
||||
$curr_date = $tr->plaintext;
|
||||
continue;
|
||||
}
|
||||
|
||||
$item = [];
|
||||
|
||||
// title
|
||||
$item['title'] = $tr->find('td', 0)->find('b', 0)->plaintext;
|
||||
|
||||
// content
|
||||
$item['content'] = $tr->find('ul', 1);
|
||||
|
||||
// uri
|
||||
$item['uri'] = $tr->find('a', 3)->getAttribute('href');
|
||||
|
||||
// date: parsed by FeedItem using strtotime
|
||||
$item['timestamp'] = $curr_date;
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -4,8 +4,9 @@ class DemoBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'teromene';
|
||||
const NAME = 'DemoBridge';
|
||||
const URI = 'http://github.com/rss-bridge/rss-bridge';
|
||||
const URI = 'https://github.com/rss-bridge/rss-bridge';
|
||||
const DESCRIPTION = 'Bridge used for demos';
|
||||
const CACHE_TIMEOUT = 15;
|
||||
|
||||
const PARAMETERS = [
|
||||
'testCheckbox' => [
|
||||
|
63
bridges/DemosBerlinBridge.php
Normal file
63
bridges/DemosBerlinBridge.php
Normal file
@ -0,0 +1,63 @@
|
||||
<?php
|
||||
|
||||
class DemosBerlinBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Demos Berlin';
|
||||
const URI = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/';
|
||||
const CACHE_TIMEOUT = 3 * 60 * 60;
|
||||
const DESCRIPTION = 'Angezeigte Versammlungen und Aufzüge in Berlin';
|
||||
const MAINTAINER = 'knrdl';
|
||||
const PARAMETERS = [[
|
||||
'days' => [
|
||||
'name' => 'Tage',
|
||||
'type' => 'number',
|
||||
'title' => 'Einträge für die nächsten Tage zurückgeben',
|
||||
'required' => true,
|
||||
'defaultValue' => 7,
|
||||
]
|
||||
]];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.berlin.de/i9f/r1/images/favicon/favicon.ico';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/index/all.json';
|
||||
$json = getContents($url);
|
||||
$jsonFile = json_decode($json, true);
|
||||
|
||||
$daysInterval = DateInterval::createFromDateString($this->getInput('days') . ' day');
|
||||
$maxTargetDate = date_add(new DateTime('now'), $daysInterval);
|
||||
|
||||
foreach ($jsonFile['index'] as $entry) {
|
||||
$entryDay = implode('-', array_reverse(explode('.', $entry['datum']))); // dd.mm.yyyy to yyyy-mm-dd
|
||||
$ts = (new DateTime())->setTimestamp(strtotime($entryDay));
|
||||
if ($ts <= $maxTargetDate) {
|
||||
$item = [];
|
||||
$item['uri'] = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/detail/' . $entry['id'];
|
||||
$item['timestamp'] = $entryDay . ' ' . $entry['von'];
|
||||
$item['title'] = $entry['thema'];
|
||||
$location = $entry['strasse_nr'] . ' ' . $entry['plz'];
|
||||
$locationQuery = http_build_query(['query' => $location]);
|
||||
$item['content'] = <<<HTML
|
||||
<h1>{$entry['thema']}</h1>
|
||||
<p>📅 <time datetime="{$item['timestamp']}">{$entry['datum']} {$entry['von']} - {$entry['bis']}</time></p>
|
||||
<a href="https://www.openstreetmap.org/search?$locationQuery">
|
||||
📍 {$location}
|
||||
</a>
|
||||
<p>{$entry['aufzugsstrecke']}</p>
|
||||
HTML;
|
||||
$item['uid'] = $this->getSanitizedHash($entry['datum'] . '-' . $entry['von'] . '-' . $entry['bis'] . '-' . $entry['thema']);
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function getSanitizedHash($string)
|
||||
{
|
||||
return hash('sha1', preg_replace('/[^a-zA-Z0-9]/', '', strtolower($string)));
|
||||
}
|
||||
}
|
@ -78,13 +78,9 @@ class DerpibooruBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$queryJson = json_decode(getContents(
|
||||
self::URI
|
||||
. 'api/v1/json/search/images?filter_id='
|
||||
. urlencode($this->getInput('f'))
|
||||
. '&q='
|
||||
. urlencode($this->getInput('q'))
|
||||
));
|
||||
$url = self::URI . 'api/v1/json/search/images?filter_id=' . urlencode($this->getInput('f')) . '&q=' . urlencode($this->getInput('q'));
|
||||
|
||||
$queryJson = json_decode(getContents($url));
|
||||
|
||||
foreach ($queryJson->images as $post) {
|
||||
$item = [];
|
||||
|
148
bridges/DeutscheWelleBridge.php
Normal file
148
bridges/DeutscheWelleBridge.php
Normal file
@ -0,0 +1,148 @@
|
||||
<?php
|
||||
|
||||
class DeutscheWelleBridge extends FeedExpander
|
||||
{
|
||||
const MAINTAINER = 'No maintainer';
|
||||
const NAME = 'Deutsche Welle Bridge';
|
||||
const URI = 'https://www.dw.com';
|
||||
const DESCRIPTION = 'Returns the full articles instead of only the intro';
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
const PARAMETERS = [[
|
||||
'feed' => [
|
||||
'name' => 'feed',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'All Top Stories and News Updates'
|
||||
=> 'http://rss.dw.com/atom/rss-en-all',
|
||||
'Top Stories'
|
||||
=> 'http://rss.dw.com/atom/rss-en-top',
|
||||
'Germany'
|
||||
=> 'http://rss.dw.com/atom/rss-en-ger',
|
||||
'World'
|
||||
=> 'http://rss.dw.com/atom/rss-en-world',
|
||||
'Europe'
|
||||
=> 'http://rss.dw.com/atom/rss-en-eu',
|
||||
'Business'
|
||||
=> 'http://rss.dw.com/atom/rss-en-bus',
|
||||
'Science'
|
||||
=> 'http://rss.dw.com/atom/rss_en_science',
|
||||
'Environment'
|
||||
=> 'http://rss.dw.com/atom/rss_en_environment',
|
||||
'Culture & Lifestyle'
|
||||
=> 'http://rss.dw.com/atom/rss-en-cul',
|
||||
'Sports'
|
||||
=> 'http://rss.dw.de/atom/rss-en-sports',
|
||||
'Visit Germany'
|
||||
=> 'http://rss.dw.com/atom/rss-en-visitgermany',
|
||||
'Asia'
|
||||
=> 'http://rss.dw.com/atom/rss-en-asia',
|
||||
'Deutsche Welle Gesamt'
|
||||
=> 'http://rss.dw.com/atom/rss-de-all',
|
||||
'Themen des Tages'
|
||||
=> 'http://rss.dw.com/atom/rss-de-top',
|
||||
'Nachrichten'
|
||||
=> 'http://rss.dw.com/atom/rss-de-news',
|
||||
'Wissenschaft'
|
||||
=> 'http://rss.dw.com/atom/rss-de-wissenschaft',
|
||||
'Sport'
|
||||
=> 'http://rss.dw.com/atom/rss-de-sport',
|
||||
'Deutschland entdecken'
|
||||
=> 'http://rss.dw.com/atom/rss-de-deutschlandentdecken',
|
||||
'Presse'
|
||||
=> 'http://rss.dw.com/atom/presse',
|
||||
'Politik'
|
||||
=> 'http://rss.dw.com/atom/rss_de_politik',
|
||||
'Wirtschaft'
|
||||
=> 'http://rss.dw.com/atom/rss-de-eco',
|
||||
'Kultur & Leben'
|
||||
=> 'http://rss.dw.com/atom/rss-de-cul',
|
||||
'Kultur & Leben: Buch'
|
||||
=> 'http://rss.dw.com/atom/rss-de-cul-buch',
|
||||
'Kultur & Leben: Film'
|
||||
=> 'http://rss.dw.com/atom/rss-de-cul-film',
|
||||
'Kultur & Leben: Musik'
|
||||
=> 'http://rss.dw.com/atom/rss-de-cul-musik',
|
||||
]
|
||||
]
|
||||
]];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas($this->getInput('feed'));
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$parsedUri = parse_url($item['uri']);
|
||||
unset($parsedUri['query']);
|
||||
$item['uri'] = $this->unparseUrl($parsedUri);
|
||||
|
||||
$page = getSimpleHTMLDOM($item['uri']);
|
||||
$page = defaultLinkTo($page, $item['uri']);
|
||||
|
||||
$article = $page->find('article', 0);
|
||||
|
||||
// author
|
||||
$author = $article->find('.author-link > span', 0);
|
||||
if ($author) {
|
||||
$item['author'] = $author->text();
|
||||
}
|
||||
|
||||
$teaser = $article->find('.teaser-text', 0);
|
||||
if (!is_null($teaser)) {
|
||||
$item['content'] = $teaser->outertext();
|
||||
} else {
|
||||
$item['content'] = '';
|
||||
}
|
||||
|
||||
// remove unneeded elements
|
||||
foreach (
|
||||
$article->find(
|
||||
'header, .advertisement, [data-tracking-name="sharing-icons-inline"], a.external-link > svg, picture > source, .vjs-wrapper, .dw-widget, footer'
|
||||
) as $bad
|
||||
) {
|
||||
$bad->remove();
|
||||
}
|
||||
// reload html as remove() is buggy
|
||||
$article = str_get_html($article->outertext());
|
||||
|
||||
// remove width and height values from img tags
|
||||
foreach ($article->find('img') as $img) {
|
||||
$img->width = null;
|
||||
$img->height = null;
|
||||
}
|
||||
|
||||
// remove bad img src's added by defaultLinkTo() above
|
||||
// these images should have src="" and will then use
|
||||
// the srcset attribute to load the best image for the displayed size
|
||||
foreach ($article->find('figure > picture > img') as $img) {
|
||||
$img->src = '';
|
||||
}
|
||||
|
||||
// replace lazy-loaded images
|
||||
foreach ($article->find('figure.placeholder-image') as $figure) {
|
||||
$img = $figure->find('img', 0);
|
||||
$img->src = str_replace('${formatId}', '906', $img->getAttribute('data-url'));
|
||||
$img->style = null;
|
||||
}
|
||||
|
||||
$item['content'] .= $article->save();
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
// https://www.php.net/manual/en/function.parse-url.php#106731
|
||||
private function unparseUrl($parsed_url)
|
||||
{
|
||||
$scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
|
||||
$host = isset($parsed_url['host']) ? $parsed_url['host'] : '';
|
||||
$port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
|
||||
$user = isset($parsed_url['user']) ? $parsed_url['user'] : '';
|
||||
$pass = isset($parsed_url['pass']) ? $parsed_url['pass'] : '';
|
||||
$pass = ($user || $pass) ? "$pass@" : '';
|
||||
$path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
|
||||
$query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
|
||||
$fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
|
||||
return "$scheme$user$pass$host$port$path$query$fragment";
|
||||
}
|
||||
}
|
28
bridges/DeutscherAeroClubBridge.php
Normal file
28
bridges/DeutscherAeroClubBridge.php
Normal file
@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
class DeutscherAeroClubBridge extends XPathAbstract
|
||||
{
|
||||
const NAME = 'Deutscher Aero Club';
|
||||
const URI = 'https://www.daec.de/news/';
|
||||
const DESCRIPTION = 'News aus Luftsport und Dachverband';
|
||||
const MAINTAINER = 'hleskien';
|
||||
|
||||
const FEED_SOURCE_URL = 'https://www.daec.de/news/';
|
||||
const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"][1]/@href';
|
||||
const XPATH_EXPRESSION_ITEM = '//div[contains(@class, "news-list-view")]/div[contains(@class, "article")]';
|
||||
const XPATH_EXPRESSION_ITEM_TITLE = './/span[@itemprop="headline"]';
|
||||
const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@itemprop="description"]/p';
|
||||
const XPATH_EXPRESSION_ITEM_URI = './/div[@class="news-header"]//a/@href';
|
||||
//const XPATH_EXPRESSION_ITEM_AUTHOR = './/';
|
||||
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time/@datetime';
|
||||
const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/img/@src';
|
||||
//const XPATH_EXPRESSION_ITEM_CATEGORIES = './/';
|
||||
|
||||
protected function formatItemTimestamp($value)
|
||||
{
|
||||
$dti = DateTimeImmutable::createFromFormat('Y-m-d', $value);
|
||||
$dti = $dti->setTime(0, 0, 0);
|
||||
return $dti->getTimestamp();
|
||||
}
|
||||
}
|
||||
|
@ -163,19 +163,6 @@ class DeveloppezDotComBridge extends FeedExpander
|
||||
]
|
||||
];
|
||||
|
||||
/**
|
||||
* Return the RSS url for selected domain
|
||||
*/
|
||||
private function getRssUrl()
|
||||
{
|
||||
$domain = $this->getInput('domain');
|
||||
if (!empty($domain)) {
|
||||
return 'https://' . $domain . self::DOMAIN . self::RSS_URL;
|
||||
}
|
||||
|
||||
return self::URI . self::RSS_URL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Grabs the RSS item from Developpez.com
|
||||
*/
|
||||
@ -189,15 +176,12 @@ class DeveloppezDotComBridge extends FeedExpander
|
||||
* Parse the content of every RSS item. And will try to get the full article
|
||||
* pointed by the item URL intead of the default abstract.
|
||||
*/
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
if (count($this->items) >= $this->getInput('limit')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// This function parse each entry in the RSS with the default parse
|
||||
$item = parent::parseItem($newsItem);
|
||||
|
||||
// There is a bug in Developpez RSS, coma are writtent as '~?' in the
|
||||
// title, so I have to fix it manually
|
||||
$item['title'] = $this->fixComaInTitle($item['title']);
|
||||
@ -229,6 +213,19 @@ class DeveloppezDotComBridge extends FeedExpander
|
||||
return $item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the RSS url for selected domain
|
||||
*/
|
||||
private function getRssUrl()
|
||||
{
|
||||
$domain = $this->getInput('domain');
|
||||
if (!empty($domain)) {
|
||||
return 'https://' . $domain . self::DOMAIN . self::RSS_URL;
|
||||
}
|
||||
|
||||
return self::URI . self::RSS_URL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace '~?' by a proper coma ','
|
||||
*/
|
||||
@ -334,6 +331,9 @@ class DeveloppezDotComBridge extends FeedExpander
|
||||
*/
|
||||
private function isHtmlTagNotTxt($txt)
|
||||
{
|
||||
if ($txt === '') {
|
||||
return false;
|
||||
}
|
||||
$html = str_get_html($txt);
|
||||
return $html && $html->root && count($html->root->children) > 0;
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ class DiarioDoAlentejoBridge extends BridgeAbstract
|
||||
}, self::PT_MONTH_NAMES),
|
||||
array_map(function ($num) {
|
||||
return sprintf('-%02d-', $num);
|
||||
}, range(1, sizeof(self::PT_MONTH_NAMES))),
|
||||
}, range(1, count(self::PT_MONTH_NAMES))),
|
||||
$element->find('span.date', 0)->innertext
|
||||
);
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user