Compare commits

...

11 commits
1.01 ... main

17 changed files with 353 additions and 197 deletions

View file

@ -1,6 +1,6 @@
# Fedi Link Fetcher # Fedi Link Fetcher
A pair of PHP scripts that extract links from your Mastodon / snac bookmarks and add them to: A pair of PHP scripts that extract links from your Mastodon / gotosocial / snac bookmarks and add them to:
- Read Later in Readeck (simple API token) - Read Later in Readeck (simple API token)
- Watch Later in YouTube Playlist (Google oAUTH API) - Watch Later in YouTube Playlist (Google oAUTH API)
@ -12,12 +12,34 @@ In Web UI:
- Settings, API tokens, Create API Token - Settings, API tokens, Create API Token
- Check Bookmarks Read + Write - Check Bookmarks Read + Write
Save it as: Save it in:
``` ```
/_credentials/token.json /_credentials/readeck_account.txt
```
Just add one line like:
```
readeck.instance.com|YOUR_TOKEN
``` ```
## 2. Getting a Youtube API token
## 2. Getting mastodon / snac / gotosocial tokens
You can either user the respective web UI or just use the [Token Generator here](https://takahashim.github.io/mastodon-access-token/) , just set the URL, login, and get the token back
Save one account per line in:
```
/_credentials/fedi_accounts.txt
```
Just add one line per user like this (you can have several times the same instance as the token determines the user):
```
mastodon.social|YOUR_TOKEN
mastodon.social|YOUR_TOKEN
my.instance.org|YOUR_TOKEN
```
The script will loop on each account but always save on the same readeck / youtube accounts.
## 3. Getting a Youtube API token (it's a tad more complicated...)
- Go to the Google Cloud Console: - Go to the Google Cloud Console:
- Project > APIs & Services > Credentials - Project > APIs & Services > Credentials
@ -28,6 +50,7 @@ Save it as:
Save it as: Save it as:
``` ```
/_credentials/client_secret.json /_credentials/client_secret.json
/_credentials/token.json
``` ```
## 3. Add your Google account as a test user ## 3. Add your Google account as a test user

View file

@ -1 +0,0 @@
https://www.openculture.com/2025/05/how-frank-lloyd-wrights-architecture-evolved-over-70-years-and-changed-america.html

View file

@ -1 +0,0 @@
https://en.wikipedia.org/wiki/Rainhill_trials

View file

@ -1 +0,0 @@
https://fossforce.com/2025/04/is-free-or-open-source-software-sustainable/

View file

@ -1 +0,0 @@
https://manualdousuario.net/en/writing-chatgpt-ai/

View file

@ -1 +0,0 @@
https://goblackcat.com/feeling-exhausted/

View file

@ -1 +0,0 @@
https://fenati.org.br/brasil-prepara-marco-regulatorio-para-data-centers-com-beneficios-fiscais-e-regras-sustentaveis/#datacenter

View file

View file

@ -0,0 +1,2 @@
{"installed":{"client_id":"XYZ.apps.googleusercontent.com","project_id":"yourproject_with_youtube_access","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"XYZXYZ"}}

View file

@ -0,0 +1,2 @@
your.instance.org|XXXYYY
another.instance.com.br|XXXYYY

View file

@ -0,0 +1 @@
XXXXXXXYYYYYYYYY

View file

@ -0,0 +1 @@
your.readeck.fr|XXXXXYYYY

View file

@ -0,0 +1 @@
{"access_token":"XXXYYY","expires_in":3599,"refresh_token":"1\/\/AAABBBCCC","scope":"https:\/\/www.googleapis.com\/auth\/youtube","token_type":"Bearer","expires_at":1755745256}

View file

@ -1,27 +1,83 @@
<?php <?php
// === Example use === // === Example use ===
#addVideoToFediList('https://www.youtube.com/watch?v=dQw4w9WgXcQ'); #addVideoToFediList('https://www.youtube.com/watch?v=dQw4w9WgXcQ');
require_once('utils.php');
function isYouTubeLink($url) { function isYouTubeLink($url) {
return preg_match('#^(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[a-zA-Z0-9_-]{11}#', $url); return preg_match('#^(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[a-zA-Z0-9_-]{11}#', $url);
} }
function addVideoToFediList($videoUrl) { function addVideoToFediList($videoUrl) {
$playlistId = @file_get_contents(__DIR__ .'/_credentials/fedilist_id.txt'); $credentialsDir = __DIR__ . '/_credentials';
$tokenPath = "$credentialsDir/token.json";
$playlistId = @file_get_contents("$credentialsDir/fedilist_id.txt");
if (!$playlistId) { if (!$playlistId) {
echo "‼️ Error: FediList ID not found. Make sure fedilist_id.txt exists.\n"; echo "‼️ Error: FediList ID not found. Make sure fedilist_id.txt exists.\n";
return false; return false;
} }
$token = @json_decode(file_get_contents(__DIR__ .'/_credentials/token.json'), true); // === Load client credentials ===
$secrets = json_decode(file_get_contents("$credentialsDir/client_secret.json"), true);
$client = $secrets['installed'] ?? $secrets['web'] ?? null;
if (!$client || !isset($client['client_id'], $client['client_secret'])) {
die("Error: Invalid client_secret.json format.\n");
}
$clientId = $client['client_id'];
$clientSecret = $client['client_secret'];
// === Load token ===
$token = @json_decode(file_get_contents($tokenPath), true);
if (!$token || !isset($token['access_token'])) { if (!$token || !isset($token['access_token'])) {
echo "‼️ Error: token.json missing or invalid. Authenticate first.\n"; echo "‼️ Error: token.json missing or invalid. Authenticate first.\n";
return false; return false;
} }
// Extract video ID from URL //print_r($token);
// === Refresh token if expired ===
// Don't let it expire, refresh if we have less than 15 minutes left
// Google Tokens usually last 60 min, so more ore less around 45min we get a new one
$refreshSecondMargin = 15*60;
if (isset($token['expires_at'])) {
$secondsLeft = $token['expires_at'] - time();
if ($secondsLeft > $refreshSecondMargin) {
$minutes = floor($secondsLeft / 60);
$seconds = $secondsLeft % 60;
echo "⏳ Token expires in $minutes minutes and $seconds seconds.\n";
} else {
echo "🔄 Access token expired or will expire in less than $refreshSecondMargin seconds. (Seconds Left: $secondsLeft). Refreshing...\n";
$refreshResponse = curlPost('https://oauth2.googleapis.com/token', [
'client_id' => $clientId,
'client_secret' => $clientSecret,
'refresh_token' => $token['refresh_token'],
'grant_type' => 'refresh_token'
]);
if (isset($refreshResponse['access_token'])) {
$token['access_token'] = $refreshResponse['access_token'];
$token['expires_in'] = $refreshResponse['expires_in'];
$token['expires_at'] = time() + $refreshResponse['expires_in'];
file_put_contents($tokenPath, json_encode($token));
echo "✅ Token refreshed.\n";
} else {
echo "‼️ Failed to refresh token: " . ($refreshResponse['error'] ?? 'unknown') . "\n";
return false;
}
}
}
// === Extract video ID ===
if (!preg_match('/(?:v=|\/)([a-zA-Z0-9_-]{11})/', $videoUrl, $matches)) { if (!preg_match('/(?:v=|\/)([a-zA-Z0-9_-]{11})/', $videoUrl, $matches)) {
echo "⁉️ Invalid YouTube URL: $videoUrl\n"; echo "⁉️ Invalid YouTube URL: $videoUrl\n";
return false; return false;

View file

@ -1,178 +1,243 @@
#!/usr/bin/php #!/usr/bin/php
<?php <?php
require("add_to_fedilist.php");
//----------------------------- //-----------------------------
// CREDENTIALS // CREDENTIALS
//----------------------------- //-----------------------------
$MASTODON_TOKEN = '8beea62e32b336e5d934d06a21b0b996';
$MASTODON_HOST = 'go.lema.org';
$READECK_TOKEN = 'LDJb4YbGKe6Fp8cSygpuw5LjmwkgGTAbFbP77TQtYwe1hFZ4';
$READECK_HOST = 'read.lema.org';
$MINIMUM_TEXT_SIZE = 500; // article with less characters of content will be ignored $MINIMUM_TEXT_SIZE = 500; // article with less characters of content will be ignored
$fediAccounts = loadAccounts(__DIR__ . '/_credentials/fedi_accounts.txt');
$readeckAccount = loadAccounts(__DIR__ . '/_credentials/readeck_account.txt');
// _credentials/readeck_account.txt
// should have only one line with host|token
// ex: gone.lema.org|XXXXYYYXXXYYY
$acc = $readeckAccount[0];
$READECK_HOST = $acc['host'];
$READECK_TOKEN = $acc['token'];
echo "Readeck Host: $READECK_HOST \n";
echo "Fedi Accounts to loop: ".count($fediAccounts)."\n";
//----------------------------- // _credentials/fedi_accountst.txt
// FETCH MASTODON BOOKMARKS // each line like with host|token
//----------------------------- // ex: gotosocial.lema.org|XXXXYYYXXXYYY
echo "# Fetching mastodon / snac bookmarks...\n"; foreach ($fediAccounts as $acc) {
date_default_timezone_set('America/Sao_Paulo'); $MASTODON_HOST = $acc['host'];
echo date('Y-m-d H:i:s')."\n"; $MASTODON_TOKEN = $acc['token'];
$ch = curl_init("https://$MASTODON_HOST/api/v1/bookmarks"); echo "";
curl_setopt_array($ch, [ echo "";
CURLOPT_RETURNTRANSFER => true, echo "--------------------------------\n";
CURLOPT_HTTPHEADER => [ echo "Host: $MASTODON_HOST\n";
"Authorization: Bearer $MASTODON_TOKEN", echo "Token: $MASTODON_TOKEN\n";
"Accept: application/json" echo "--------------------------------\n";
] echo "";
]);
$bookmarksJson = curl_exec($ch);
$bookmarks = json_decode($bookmarksJson, true);
if (!is_array($bookmarks)) {
die("❌ Failed to parse Mastodon bookmarks.\n");
}
echo "Found bookmarks:".count($bookmarks)."\n";
//-----------------------------
// FIND VALID URLs in posts
//-----------------------------
foreach ($bookmarks as $status) {
if (!isset($status['content'])) {
continue;
}
$content = strip_tags($status['content']);
preg_match_all('/https?:\/\/[^\s"<]+/', $content, $matches);
if (!empty($matches[0])) {
$oneLink = $matches[0][0];
if (filter_var($oneLink, FILTER_VALIDATE_URL)) {
$links[] = $oneLink;
} else {
// This happens for example if URL has an emoji at the end
echo "INVALID URL: $oneLink\n";
}
}
}
echo "Valid URLS:".count($links)."\n";
print_r($links);
//-----------------------------
// SEND LINKS TO READECK
//-----------------------------
$apiUrl = "https://$READECK_HOST/api/bookmarks";
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0');
$headers = [
"Authorization: Bearer $READECK_TOKEN",
'Accept: application/json',
'Content-Type: application/json'
];
$alreadySentDir = __DIR__ . "/_already_sent";
if (!is_dir($alreadySentDir)) {
mkdir($alreadySentDir, 0755, true); // recursive mkdir
}
require("add_to_fedilist.php");
foreach ($links as $link) {
if (isYouTubeLink($link)) {
addVideoToFediList($link);
continue;
}
// READECK will accept several times the same URL !
// Make sure we don't send it several times by keeping an archive here
$hash = md5($link);
$filePath = __DIR__ . "/_already_sent/{$hash}.txt";
if (file_exists($filePath)) { //-----------------------------
echo " Already sent: $link\n"; // FETCH MASTODON BOOKMARKS
continue; //-----------------------------
} echo "# Fetching mastodon / gotosocial / snac bookmarks...\n";
date_default_timezone_set('America/Sao_Paulo');
echo date('Y-m-d H:i:s')."\n";
$ch = curl_init("https://$MASTODON_HOST/api/v1/bookmarks");
$options = [ #GotoSocial will reply with error "I am a teapot" if no user agent is sent...
'http' => [ curl_setopt_array($ch, [
'method' => 'GET', CURLOPT_RETURNTRANSFER => true,
'header' => "User-Agent: Mozilla/5.0\r\n" CURLOPT_USERAGENT => "FediSlurperScript/1.0 (https://code.lema.org/santiago/fedi_slurp)",
]
];
// First check if page has content CURLOPT_HTTPHEADER => [
//$ch = curl_init($link);; "Authorization: Bearer $MASTODON_TOKEN",
curl_setopt($ch, CURLOPT_URL, $link); "Accept: application/json"
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); ]
$content = curl_exec($ch);
if ($content === false) {
echo "❌ Failed to fetch $link\n";
continue;
}
$plainText = strip_tags($content);
if (strlen($plainText) < $MINIMUM_TEXT_SIZE) {
echo "⚠️ Skipping $link\ncontent too small (".strlen($plainText)." chars < $MINIMUM_TEXT_SIZE )\n";
continue;
}
echo "🟢 Will add to Readeck $link\nLength: " . strlen($plainText)."\n";
//not passing title here, since we don't have it
$payload = json_encode([
"labels" => ["automasto"],
"url" => $link
]); ]);
curl_setopt($ch, CURLOPT_URL, $apiUrl); $bookmarksJson = curl_exec($ch);
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); $bookmarks = json_decode($bookmarksJson, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); if (!is_array($bookmarks)) {
die("❌ Failed to parse Mastodon bookmarks.\n");
}
$response = curl_exec($ch); echo "Found bookmarks:".count($bookmarks)."\n";
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
#print_r($bookmarks);
if (curl_errno($ch)) { //-----------------------------
echo "❌ Error adding $link: " . curl_error($ch) . "\n"; // FIND VALID URLs in posts
} else { //-----------------------------
// Store already sent file only if connection worked foreach ($bookmarks as $status) {
file_put_contents($filePath, $link); if (!isset($status['content'])) {
continue;
}
$content = strip_tags($status['content']);
preg_match_all('/https?:\/\/[^\s"<]+/', $content, $matches);
if (!empty($matches[0])) {
$json = json_decode($response, true); $oneLink = $matches[0][0];
if (json_last_error() === JSON_ERROR_NONE) { if (filter_var($oneLink, FILTER_VALIDATE_URL)) {
if ($httpCode >= 200 && $httpCode < 300) { $links[] = $oneLink;
echo "✅ [$httpCode] Successfully added: $link\n";
} else { } else {
echo "⚠️ Server returned status $httpCode for $link\n"; // This happens for example if URL has an emoji at the end
echo "INVALID URL: $oneLink\n";
} }
} else {
echo "⚠️ Response is not valid JSON for $link: $response\n";
} }
} }
} if (isset($links)) {
echo "Valid URLS:".count($links)."\n";
print_r($links);
} else {
echo "NO links founds. Kthxbye \n";
die(0);
curl_close($ch); }
//-----------------------------
// SEND LINKS TO READECK
//-----------------------------
$apiUrl = "https://$READECK_HOST/api/bookmarks";
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0');
$headers = [
"Authorization: Bearer $READECK_TOKEN",
'Accept: application/json',
'Content-Type: application/json'
];
$alreadySentDir = __DIR__ . "/_already_sent";
if (!is_dir($alreadySentDir)) {
mkdir($alreadySentDir, 0755, true); // recursive mkdir
}
foreach ($links as $link) {
if (isYouTubeLink($link)) {
addVideoToFediList($link);
continue;
}
// READECK will accept several times the same URL !
// Make sure we don't send it several times by keeping an archive here
$hash = md5($link);
$filePath = __DIR__ . "/_already_sent/{$hash}.txt";
if (file_exists($filePath)) {
echo " Already sent: $link\n";
continue;
}
$options = [
'http' => [
'method' => 'GET',
'header' => "User-Agent: Mozilla/5.0\r\n"
]
];
// First check if page has content
//$ch = curl_init($link);;
curl_setopt($ch, CURLOPT_URL, $link);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$content = curl_exec($ch);
if ($content === false) {
echo "❌ Failed to fetch $link\n";
continue;
}
$plainText = strip_tags($content);
if (strlen($plainText) < $MINIMUM_TEXT_SIZE) {
echo "⚠️ Skipping $link\ncontent too small (".strlen($plainText)." chars < $MINIMUM_TEXT_SIZE )\n";
continue;
}
echo "🟢 Will add to Readeck $link\nLength: " . strlen($plainText)."\n";
//not passing title here, since we don't have it
$payload = json_encode([
"labels" => ["automasto"],
"url" => $link
]);
curl_setopt($ch, CURLOPT_URL, $apiUrl);
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if (curl_errno($ch)) {
echo "❌ Error adding $link: " . curl_error($ch) . "\n";
} else {
// Store already sent file only if connection worked
file_put_contents($filePath, $link);
$json = json_decode($response, true);
if (json_last_error() === JSON_ERROR_NONE) {
if ($httpCode >= 200 && $httpCode < 300) {
echo "✅ [$httpCode] Successfully added: $link\n";
} else {
echo "⚠️ Server returned status $httpCode for $link\n";
}
} else {
echo "⚠️ Response is not valid JSON for $link: $response\n";
}
}
}
curl_close($ch);
} // end accounts loop
function loadAccounts(string $filepath): array
{
$accounts = [];
if (!file_exists($filepath)) {
return $accounts; // empty if file not found
}
$lines = file($filepath, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
foreach ($lines as $line) {
$line = trim($line);
if ($line === '') {
continue;
}
[$host, $token] = explode('|', $line, 2);
$accounts[] = [
'host' => $host,
'token' => $token
];
}
return $accounts;
}

View file

@ -1,6 +1,8 @@
#!/usr/bin/php #!/usr/bin/php
<?php <?php
require_once('utils.php');
// === Load client credentials === // === Load client credentials ===
$secrets = json_decode(file_get_contents(__DIR__ . '/_credentials/client_secret.json'), true); $secrets = json_decode(file_get_contents(__DIR__ . '/_credentials/client_secret.json'), true);
$client = $secrets['installed'] ?? $secrets['web'] ?? null; $client = $secrets['installed'] ?? $secrets['web'] ?? null;
@ -12,32 +14,15 @@ if (!$client || !isset($client['client_id'], $client['client_secret'])) {
$clientId = $client['client_id']; $clientId = $client['client_id'];
$clientSecret = $client['client_secret']; $clientSecret = $client['client_secret'];
// === cURL helper function ===
function curlPost($url, $data, $headers = []) {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, array_merge([
'Content-Type: application/x-www-form-urlencoded',
'User-Agent: curl/7.64.1'
], $headers));
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
$response = curl_exec($ch);
if (curl_errno($ch)) {
die("cURL error: " . curl_error($ch) . "\n");
}
curl_close($ch);
return json_decode($response, true);
}
// === Create playlist helper === // === Create playlist helper ===
function createPlaylist($accessToken) { function createPlaylist($accessToken)
{
$path = __DIR__ . '/_credentials/fedilist_id.txt'; $path = __DIR__ . '/_credentials/fedilist_id.txt';
if (file_exists($path)) { if (file_exists($path)) {
echo "FediList Playlist ID already exists at :$path\n"; echo "✅ FediList Playlist ID already exists at :$path\n";
return; return;
} }
$data = [ $data = [
@ -69,12 +54,12 @@ function createPlaylist($accessToken) {
$result = json_decode($response, true); $result = json_decode($response, true);
if (isset($result['id'])) { if (isset($result['id'])) {
echo "FediList created!\nPlaylist ID: " . $result['id'] . "\n"; echo "FediList created!\nPlaylist ID: " . $result['id'] . "\n";
file_put_contents($path, $result['id']); file_put_contents($path, $result['id']);
} else { } else {
echo "Failed to create playlist:\n$response\n"; echo "Failed to create playlist:\n$response\n";
} }
} }
@ -88,11 +73,15 @@ if (!isset($deviceData['user_code'])) {
die("Failed to get device code.\n"); die("Failed to get device code.\n");
} }
echo "\n";
echo "==============================\n";
echo "==== DEVICE AUTHORIZATION ====\n"; echo "==== DEVICE AUTHORIZATION ====\n";
echo "==============================\n";
echo "Visit: " . $deviceData['verification_url'] . "\n"; echo "Visit: " . $deviceData['verification_url'] . "\n";
echo "Enter code: " . $deviceData['user_code'] . "\n\n"; echo "\n";
echo "Enter code: " . $deviceData['user_code'] . "\n\nFinish login process and come back here.";
echo "Waiting...\n"; echo "\n\nWaiting...";
// === Step 2: Poll for token === // === Step 2: Poll for token ===
@ -100,6 +89,7 @@ $token = null;
$startTime = time(); $startTime = time();
while (true) { while (true) {
sleep($deviceData['interval']); sleep($deviceData['interval']);
echo ".";
$tokenResponse = curlPost('https://oauth2.googleapis.com/token', [ $tokenResponse = curlPost('https://oauth2.googleapis.com/token', [
'client_id' => $clientId, 'client_id' => $clientId,
@ -109,22 +99,22 @@ while (true) {
]); ]);
if (isset($tokenResponse['access_token'])) { if (isset($tokenResponse['access_token'])) {
$token = $tokenResponse; $tokenResponse['expires_at'] = time() + $tokenResponse['expires_in'];
echo "Saving token.json\n"; $path = __DIR__ . '/_credentials/token.json';
file_put_contents(__DIR__ . '/_credentials/token.json', json_encode($token)); file_put_contents($path, json_encode($tokenResponse));
echo "\n✅ Token saved as $path.\n";
break; break;
} }
if (isset($tokenResponse['error']) && $tokenResponse['error'] !== 'authorization_pending') { if (isset($tokenResponse['error']) && $tokenResponse['error'] !== 'authorization_pending') {
die("Auth error: " . $tokenResponse['error'] . "\n"); die("\nAuth error: " . $tokenResponse['error'] . "\n");
} }
if (time() - $startTime > $deviceData['expires_in']) { if (time() - $startTime > $deviceData['expires_in']) {
die("Authorization timed out.\n"); die("\nAuthorization timed out.\n");
} }
} }
// === Step 3: Create FediList Playlist === // === Step 3: Create FediList Playlist ===
createPlaylist($token['access_token']); createPlaylist($tokenResponse['access_token']);
?>

21
utils.php Normal file
View file

@ -0,0 +1,21 @@
<?php
// === cURL helper function ===
function curlPost($url, $data, $headers = []) {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, array_merge([
'Content-Type: application/x-www-form-urlencoded',
'User-Agent: curl/7.64.1'
], $headers));
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
$response = curl_exec($ch);
if (curl_errno($ch)) {
die("cURL error: " . curl_error($ch) . "\n");
}
curl_close($ch);
return json_decode($response, true);
}
?>