diff --git a/README.md b/README.md index b92f7413..1182b395 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Fedi Link Fetcher -A pair of PHP scripts that extract links from your Mastodon / snac bookmarks and add them to: +A pair of PHP scripts that extract links from your Mastodon / gotosocial / snac bookmarks and add them to: - Read Later in Readeck (simple API token) - Watch Later in YouTube Playlist (Google oAUTH API) @@ -12,12 +12,34 @@ In Web UI: - Settings, API tokens, Create API Token - Check Bookmarks Read + Write -Save it as: +Save it in: ``` -/_credentials/token.json +/_credentials/readeck_account.txt +``` +Just add one line like: +``` +readeck.instance.com|YOUR_TOKEN ``` -## 2. Getting a Youtube API token + +## 2. Getting mastodon / snac / gotosocial tokens + +You can either user the respective web UI or just use the [Token Generator here](https://takahashim.github.io/mastodon-access-token/) , just set the URL, login, and get the token back + +Save one account per line in: +``` +/_credentials/fedi_accounts.txt +``` +Just add one line per user like this (you can have several times the same instance as the token determines the user): +``` +mastodon.social|YOUR_TOKEN +mastodon.social|YOUR_TOKEN +my.instance.org|YOUR_TOKEN +``` + +The script will loop on each account but always save on the same readeck / youtube accounts. + +## 3. Getting a Youtube API token (it's a tad more complicated...) - Go to the Google Cloud Console: - Project > APIs & Services > Credentials @@ -28,6 +50,7 @@ Save it as: Save it as: ``` /_credentials/client_secret.json +/_credentials/token.json ``` ## 3. Add your Google account as a test user diff --git a/_already_sent/.gitkeep b/_already_sent/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/_already_sent/0cf7dedb7cfac4340056a612bc3c50b1.txt b/_already_sent/0cf7dedb7cfac4340056a612bc3c50b1.txt deleted file mode 100644 index a963820c..00000000 --- a/_already_sent/0cf7dedb7cfac4340056a612bc3c50b1.txt +++ /dev/null @@ -1 +0,0 @@ -https://www.openculture.com/2025/05/how-frank-lloyd-wrights-architecture-evolved-over-70-years-and-changed-america.html \ No newline at end of file diff --git a/_already_sent/12b2cc8c05f23758c6e812ef34044f53.txt b/_already_sent/12b2cc8c05f23758c6e812ef34044f53.txt deleted file mode 100644 index 83138f71..00000000 --- a/_already_sent/12b2cc8c05f23758c6e812ef34044f53.txt +++ /dev/null @@ -1 +0,0 @@ -https://en.wikipedia.org/wiki/Rainhill_trials \ No newline at end of file diff --git a/_already_sent/3f2721d52927f2d2c5f1146665b441dd.txt b/_already_sent/3f2721d52927f2d2c5f1146665b441dd.txt deleted file mode 100644 index e60c2b8a..00000000 --- a/_already_sent/3f2721d52927f2d2c5f1146665b441dd.txt +++ /dev/null @@ -1 +0,0 @@ -https://fossforce.com/2025/04/is-free-or-open-source-software-sustainable/ \ No newline at end of file diff --git a/_already_sent/8eef8d9550fb4952c4ef2ba2656b4038.txt b/_already_sent/8eef8d9550fb4952c4ef2ba2656b4038.txt deleted file mode 100644 index db0cfd4f..00000000 --- a/_already_sent/8eef8d9550fb4952c4ef2ba2656b4038.txt +++ /dev/null @@ -1 +0,0 @@ -https://manualdousuario.net/en/writing-chatgpt-ai/ \ No newline at end of file diff --git a/_already_sent/a3ffc5f64551046ad7132d159f1f40e7.txt b/_already_sent/a3ffc5f64551046ad7132d159f1f40e7.txt deleted file mode 100644 index 2e5d8665..00000000 --- a/_already_sent/a3ffc5f64551046ad7132d159f1f40e7.txt +++ /dev/null @@ -1 +0,0 @@ -https://goblackcat.com/feeling-exhausted/ \ No newline at end of file diff --git a/_already_sent/db41d26877002dfa9dba650122b8a298.txt b/_already_sent/db41d26877002dfa9dba650122b8a298.txt deleted file mode 100644 index 8b80b665..00000000 --- a/_already_sent/db41d26877002dfa9dba650122b8a298.txt +++ /dev/null @@ -1 +0,0 @@ -https://fenati.org.br/brasil-prepara-marco-regulatorio-para-data-centers-com-beneficios-fiscais-e-regras-sustentaveis/#datacenter \ No newline at end of file diff --git a/_credentials/.gitkeep b/_credentials/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/_credentials_example/.gitkeep b/_credentials_example/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/_credentials_example/client_secret.json b/_credentials_example/client_secret.json new file mode 100644 index 00000000..bfda3599 --- /dev/null +++ b/_credentials_example/client_secret.json @@ -0,0 +1,2 @@ +{"installed":{"client_id":"XYZ.apps.googleusercontent.com","project_id":"yourproject_with_youtube_access","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"XYZXYZ"}} + diff --git a/_credentials_example/fedi_accounts.txt b/_credentials_example/fedi_accounts.txt new file mode 100644 index 00000000..d6f6fa63 --- /dev/null +++ b/_credentials_example/fedi_accounts.txt @@ -0,0 +1,2 @@ +your.instance.org|XXXYYY +another.instance.com.br|XXXYYY diff --git a/_credentials_example/fedilist_id.txt b/_credentials_example/fedilist_id.txt new file mode 100644 index 00000000..4859b3e9 --- /dev/null +++ b/_credentials_example/fedilist_id.txt @@ -0,0 +1 @@ +XXXXXXXYYYYYYYYY diff --git a/_credentials_example/readeck_account.txt b/_credentials_example/readeck_account.txt new file mode 100644 index 00000000..d0c57fe2 --- /dev/null +++ b/_credentials_example/readeck_account.txt @@ -0,0 +1 @@ +your.readeck.fr|XXXXXYYYY diff --git a/_credentials_example/token.json b/_credentials_example/token.json new file mode 100644 index 00000000..c2c648ed --- /dev/null +++ b/_credentials_example/token.json @@ -0,0 +1 @@ +{"access_token":"XXXYYY","expires_in":3599,"refresh_token":"1\/\/AAABBBCCC","scope":"https:\/\/www.googleapis.com\/auth\/youtube","token_type":"Bearer","expires_at":1755745256} diff --git a/add_to_fedilist.php b/add_to_fedilist.php index 74a2b7d5..a9aff48a 100644 --- a/add_to_fedilist.php +++ b/add_to_fedilist.php @@ -1,31 +1,87 @@ $refreshSecondMargin) { + $minutes = floor($secondsLeft / 60); + $seconds = $secondsLeft % 60; + echo "⏳ Token expires in $minutes minutes and $seconds seconds.\n"; + } else { + echo "🔄 Access token expired or will expire in less than $refreshSecondMargin seconds. (Seconds Left: $secondsLeft). Refreshing...\n"; + + $refreshResponse = curlPost('https://oauth2.googleapis.com/token', [ + 'client_id' => $clientId, + 'client_secret' => $clientSecret, + 'refresh_token' => $token['refresh_token'], + 'grant_type' => 'refresh_token' + ]); + + if (isset($refreshResponse['access_token'])) { + $token['access_token'] = $refreshResponse['access_token']; + $token['expires_in'] = $refreshResponse['expires_in']; + $token['expires_at'] = time() + $refreshResponse['expires_in']; + file_put_contents($tokenPath, json_encode($token)); + echo "✅ Token refreshed.\n"; + } else { + echo "‼️ Failed to refresh token: " . ($refreshResponse['error'] ?? 'unknown') . "\n"; + return false; + } + } +} + + + // === Extract video ID === if (!preg_match('/(?:v=|\/)([a-zA-Z0-9_-]{11})/', $videoUrl, $matches)) { echo "⁉️ Invalid YouTube URL: $videoUrl\n"; return false; - } + } $videoId = $matches[1]; // === Step 1: Check if video is already in playlist === diff --git a/fedi_slurp.php b/fedi_slurp.php index a473b6b8..0c33318d 100755 --- a/fedi_slurp.php +++ b/fedi_slurp.php @@ -1,178 +1,243 @@ #!/usr/bin/php true, - CURLOPT_HTTPHEADER => [ - "Authorization: Bearer $MASTODON_TOKEN", - "Accept: application/json" - ] -]); - -$bookmarksJson = curl_exec($ch); -$bookmarks = json_decode($bookmarksJson, true); -if (!is_array($bookmarks)) { - die("❌ Failed to parse Mastodon bookmarks.\n"); -} - -echo "Found bookmarks:".count($bookmarks)."\n"; - -//----------------------------- -// FIND VALID URLs in posts -//----------------------------- - -foreach ($bookmarks as $status) { - if (!isset($status['content'])) { - continue; - } - $content = strip_tags($status['content']); - preg_match_all('/https?:\/\/[^\s"<]+/', $content, $matches); - if (!empty($matches[0])) { - - $oneLink = $matches[0][0]; - if (filter_var($oneLink, FILTER_VALIDATE_URL)) { - $links[] = $oneLink; - } else { - // This happens for example if URL has an emoji at the end - echo "INVALID URL: $oneLink\n"; - } - } -} - - -echo "Valid URLS:".count($links)."\n"; - -print_r($links); - - -//----------------------------- -// SEND LINKS TO READECK -//----------------------------- - -$apiUrl = "https://$READECK_HOST/api/bookmarks"; - -$ch = curl_init(); -curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); -curl_setopt($ch, CURLOPT_POST, true); -curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0'); - -$headers = [ - "Authorization: Bearer $READECK_TOKEN", - 'Accept: application/json', - 'Content-Type: application/json' -]; - -$alreadySentDir = __DIR__ . "/_already_sent"; - -if (!is_dir($alreadySentDir)) { - mkdir($alreadySentDir, 0755, true); // recursive mkdir -} - -require("add_to_fedilist.php"); - - -foreach ($links as $link) { - -if (isYouTubeLink($link)) { - addVideoToFediList($link); - continue; -} + echo ""; + echo ""; + echo "--------------------------------\n"; + echo "Host: $MASTODON_HOST\n"; + echo "Token: $MASTODON_TOKEN\n"; + echo "--------------------------------\n"; + echo ""; - // READECK will accept several times the same URL ! - // Make sure we don't send it several times by keeping an archive here - $hash = md5($link); - $filePath = __DIR__ . "/_already_sent/{$hash}.txt"; - if (file_exists($filePath)) { - echo "ℹ️ Already sent: $link\n"; - continue; - } + //----------------------------- + // FETCH MASTODON BOOKMARKS + //----------------------------- + echo "# Fetching mastodon / gotosocial / snac bookmarks...\n"; + date_default_timezone_set('America/Sao_Paulo'); + echo date('Y-m-d H:i:s')."\n"; + $ch = curl_init("https://$MASTODON_HOST/api/v1/bookmarks"); - $options = [ - 'http' => [ - 'method' => 'GET', - 'header' => "User-Agent: Mozilla/5.0\r\n" - ] -]; + #GotoSocial will reply with error "I am a teapot" if no user agent is sent... + curl_setopt_array($ch, [ + CURLOPT_RETURNTRANSFER => true, + CURLOPT_USERAGENT => "FediSlurperScript/1.0 (https://code.lema.org/santiago/fedi_slurp)", - // First check if page has content - //$ch = curl_init($link);; - curl_setopt($ch, CURLOPT_URL, $link); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - $content = curl_exec($ch); - - if ($content === false) { - echo "❌ Failed to fetch $link\n"; - continue; - } - $plainText = strip_tags($content); - - if (strlen($plainText) < $MINIMUM_TEXT_SIZE) { - echo "⚠️ Skipping $link\ncontent too small (".strlen($plainText)." chars < $MINIMUM_TEXT_SIZE )\n"; - continue; - } - - echo "🟢 Will add to Readeck $link\nLength: " . strlen($plainText)."\n"; - - //not passing title here, since we don't have it - $payload = json_encode([ - "labels" => ["automasto"], - "url" => $link + CURLOPT_HTTPHEADER => [ + "Authorization: Bearer $MASTODON_TOKEN", + "Accept: application/json" + ] ]); - curl_setopt($ch, CURLOPT_URL, $apiUrl); - curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); - curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + $bookmarksJson = curl_exec($ch); + $bookmarks = json_decode($bookmarksJson, true); + if (!is_array($bookmarks)) { + die("❌ Failed to parse Mastodon bookmarks.\n"); + } - $response = curl_exec($ch); - $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + echo "Found bookmarks:".count($bookmarks)."\n"; + #print_r($bookmarks); - if (curl_errno($ch)) { - echo "❌ Error adding $link: " . curl_error($ch) . "\n"; - } else { + //----------------------------- + // FIND VALID URLs in posts + //----------------------------- - // Store already sent file only if connection worked - file_put_contents($filePath, $link); + foreach ($bookmarks as $status) { + if (!isset($status['content'])) { + continue; + } + $content = strip_tags($status['content']); + preg_match_all('/https?:\/\/[^\s"<]+/', $content, $matches); + if (!empty($matches[0])) { - $json = json_decode($response, true); - if (json_last_error() === JSON_ERROR_NONE) { - if ($httpCode >= 200 && $httpCode < 300) { - echo "✅ [$httpCode] Successfully added: $link\n"; + $oneLink = $matches[0][0]; + if (filter_var($oneLink, FILTER_VALIDATE_URL)) { + $links[] = $oneLink; } else { - echo "⚠️ Server returned status $httpCode for $link\n"; + // This happens for example if URL has an emoji at the end + echo "INVALID URL: $oneLink\n"; } - } else { - echo "⚠️ Response is not valid JSON for $link: $response\n"; } } -} + if (isset($links)) { + echo "Valid URLS:".count($links)."\n"; + print_r($links); + } else { + echo "NO links founds. Kthxbye \n"; + die(0); -curl_close($ch); + } + + //----------------------------- + // SEND LINKS TO READECK + //----------------------------- + + $apiUrl = "https://$READECK_HOST/api/bookmarks"; + + $ch = curl_init(); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0'); + + $headers = [ + "Authorization: Bearer $READECK_TOKEN", + 'Accept: application/json', + 'Content-Type: application/json' + ]; + + $alreadySentDir = __DIR__ . "/_already_sent"; + + if (!is_dir($alreadySentDir)) { + mkdir($alreadySentDir, 0755, true); // recursive mkdir + } + + + + foreach ($links as $link) { + + if (isYouTubeLink($link)) { + addVideoToFediList($link); + continue; + } + + + + // READECK will accept several times the same URL ! + // Make sure we don't send it several times by keeping an archive here + $hash = md5($link); + $filePath = __DIR__ . "/_already_sent/{$hash}.txt"; + + if (file_exists($filePath)) { + echo "ℹ️ Already sent: $link\n"; + continue; + } + + + $options = [ + 'http' => [ + 'method' => 'GET', + 'header' => "User-Agent: Mozilla/5.0\r\n" + ] +]; + + // First check if page has content + //$ch = curl_init($link);; + curl_setopt($ch, CURLOPT_URL, $link); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + $content = curl_exec($ch); + + if ($content === false) { + echo "❌ Failed to fetch $link\n"; + continue; + } + $plainText = strip_tags($content); + + if (strlen($plainText) < $MINIMUM_TEXT_SIZE) { + echo "⚠️ Skipping $link\ncontent too small (".strlen($plainText)." chars < $MINIMUM_TEXT_SIZE )\n"; + continue; + } + + echo "🟢 Will add to Readeck $link\nLength: " . strlen($plainText)."\n"; + + //not passing title here, since we don't have it + $payload = json_encode([ + "labels" => ["automasto"], + "url" => $link + ]); + + curl_setopt($ch, CURLOPT_URL, $apiUrl); + curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + + $response = curl_exec($ch); + $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + + + if (curl_errno($ch)) { + echo "❌ Error adding $link: " . curl_error($ch) . "\n"; + } else { + + // Store already sent file only if connection worked + file_put_contents($filePath, $link); + + $json = json_decode($response, true); + if (json_last_error() === JSON_ERROR_NONE) { + if ($httpCode >= 200 && $httpCode < 300) { + echo "✅ [$httpCode] Successfully added: $link\n"; + } else { + echo "⚠️ Server returned status $httpCode for $link\n"; + } + } else { + echo "⚠️ Response is not valid JSON for $link: $response\n"; + } + } + + } + + curl_close($ch); + +} // end accounts loop + + +function loadAccounts(string $filepath): array +{ + $accounts = []; + + if (!file_exists($filepath)) { + return $accounts; // empty if file not found + } + + $lines = file($filepath, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + + foreach ($lines as $line) { + $line = trim($line); + if ($line === '') { + continue; + } + + [$host, $token] = explode('|', $line, 2); + $accounts[] = [ + 'host' => $host, + 'token' => $token + ]; + } + + return $accounts; +} diff --git a/update_google_token.php b/update_google_token.php index f183070a..71469ce5 100755 --- a/update_google_token.php +++ b/update_google_token.php @@ -1,6 +1,8 @@ #!/usr/bin/php $clientId, @@ -109,22 +99,22 @@ while (true) { ]); if (isset($tokenResponse['access_token'])) { - $token = $tokenResponse; - echo "Saving token.json\n"; - file_put_contents(__DIR__ . '/_credentials/token.json', json_encode($token)); + $tokenResponse['expires_at'] = time() + $tokenResponse['expires_in']; + $path = __DIR__ . '/_credentials/token.json'; + file_put_contents($path, json_encode($tokenResponse)); + echo "\n✅ Token saved as $path.\n"; break; } + if (isset($tokenResponse['error']) && $tokenResponse['error'] !== 'authorization_pending') { - die("Auth error: " . $tokenResponse['error'] . "\n"); + die("\nAuth error: " . $tokenResponse['error'] . "\n"); } if (time() - $startTime > $deviceData['expires_in']) { - die("Authorization timed out.\n"); + die("\nAuthorization timed out.\n"); } } // === Step 3: Create FediList Playlist === -createPlaylist($token['access_token']); - -?> +createPlaylist($tokenResponse['access_token']); diff --git a/utils.php b/utils.php new file mode 100644 index 00000000..39efbd04 --- /dev/null +++ b/utils.php @@ -0,0 +1,21 @@ +