2025-05-21 00:44:19 +00:00
|
|
|
|
#!/usr/bin/php
|
|
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
//-----------------------------
|
|
|
|
|
// CREDENTIALS
|
|
|
|
|
//-----------------------------
|
|
|
|
|
|
|
|
|
|
$MASTODON_TOKEN = '8beea62e32b336e5d934d06a21b0b996';
|
|
|
|
|
$MASTODON_HOST = 'go.lema.org';
|
|
|
|
|
|
|
|
|
|
$READECK_TOKEN = 'LDJb4YbGKe6Fp8cSygpuw5LjmwkgGTAbFbP77TQtYwe1hFZ4';
|
|
|
|
|
$READECK_HOST = 'read.lema.org';
|
|
|
|
|
|
|
|
|
|
$MINIMUM_TEXT_SIZE = 500; // article with less characters of content will be ignored
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------
|
|
|
|
|
// FETCH MASTODON BOOKMARKS
|
|
|
|
|
//-----------------------------
|
|
|
|
|
echo "# Fetching mastodon / snac bookmarks...\n";
|
|
|
|
|
date_default_timezone_set('America/Sao_Paulo');
|
|
|
|
|
echo date('Y-m-d H:i:s')."\n";
|
|
|
|
|
|
|
|
|
|
$ch = curl_init("https://$MASTODON_HOST/api/v1/bookmarks");
|
|
|
|
|
curl_setopt_array($ch, [
|
|
|
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
|
|
|
CURLOPT_HTTPHEADER => [
|
|
|
|
|
"Authorization: Bearer $MASTODON_TOKEN",
|
|
|
|
|
"Accept: application/json"
|
|
|
|
|
]
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
$bookmarksJson = curl_exec($ch);
|
|
|
|
|
$bookmarks = json_decode($bookmarksJson, true);
|
|
|
|
|
if (!is_array($bookmarks)) {
|
|
|
|
|
die("❌ Failed to parse Mastodon bookmarks.\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
echo "Found bookmarks:".count($bookmarks)."\n";
|
|
|
|
|
|
|
|
|
|
//-----------------------------
|
|
|
|
|
// FIND VALID URLs in posts
|
|
|
|
|
//-----------------------------
|
|
|
|
|
|
|
|
|
|
foreach ($bookmarks as $status) {
|
|
|
|
|
if (!isset($status['content'])) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
$content = strip_tags($status['content']);
|
|
|
|
|
preg_match_all('/https?:\/\/[^\s"<]+/', $content, $matches);
|
|
|
|
|
if (!empty($matches[0])) {
|
|
|
|
|
|
|
|
|
|
$oneLink = $matches[0][0];
|
|
|
|
|
if (filter_var($oneLink, FILTER_VALIDATE_URL)) {
|
|
|
|
|
$links[] = $oneLink;
|
|
|
|
|
} else {
|
|
|
|
|
// This happens for example if URL has an emoji at the end
|
|
|
|
|
echo "INVALID URL: $oneLink\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo "Valid URLS:".count($links)."\n";
|
|
|
|
|
|
|
|
|
|
print_r($links);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------
|
|
|
|
|
// SEND LINKS TO READECK
|
|
|
|
|
//-----------------------------
|
|
|
|
|
|
|
|
|
|
$apiUrl = "https://$READECK_HOST/api/bookmarks";
|
|
|
|
|
|
|
|
|
|
$ch = curl_init();
|
|
|
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
|
|
|
curl_setopt($ch, CURLOPT_POST, true);
|
|
|
|
|
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0');
|
|
|
|
|
|
|
|
|
|
$headers = [
|
|
|
|
|
"Authorization: Bearer $READECK_TOKEN",
|
|
|
|
|
'Accept: application/json',
|
|
|
|
|
'Content-Type: application/json'
|
|
|
|
|
];
|
|
|
|
|
|
2025-05-21 01:52:50 +00:00
|
|
|
|
$alreadySentDir = __DIR__ . "/_already_sent";
|
2025-05-21 00:44:19 +00:00
|
|
|
|
|
|
|
|
|
if (!is_dir($alreadySentDir)) {
|
|
|
|
|
mkdir($alreadySentDir, 0755, true); // recursive mkdir
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
require("add_to_fedilist.php");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
foreach ($links as $link) {
|
|
|
|
|
|
|
|
|
|
if (isYouTubeLink($link)) {
|
|
|
|
|
addVideoToFediList($link);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// READECK will accept several times the same URL !
|
|
|
|
|
// Make sure we don't send it several times by keeping an archive here
|
|
|
|
|
$hash = md5($link);
|
2025-05-21 01:52:50 +00:00
|
|
|
|
$filePath = __DIR__ . "/_already_sent/{$hash}.txt";
|
2025-05-21 00:44:19 +00:00
|
|
|
|
|
|
|
|
|
if (file_exists($filePath)) {
|
|
|
|
|
echo "ℹ️ Already sent: $link\n";
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$options = [
|
|
|
|
|
'http' => [
|
|
|
|
|
'method' => 'GET',
|
|
|
|
|
'header' => "User-Agent: Mozilla/5.0\r\n"
|
|
|
|
|
]
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
// First check if page has content
|
|
|
|
|
//$ch = curl_init($link);;
|
|
|
|
|
curl_setopt($ch, CURLOPT_URL, $link);
|
|
|
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
|
|
|
$content = curl_exec($ch);
|
|
|
|
|
|
|
|
|
|
if ($content === false) {
|
|
|
|
|
echo "❌ Failed to fetch $link\n";
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
$plainText = strip_tags($content);
|
|
|
|
|
|
|
|
|
|
if (strlen($plainText) < $MINIMUM_TEXT_SIZE) {
|
|
|
|
|
echo "⚠️ Skipping $link\ncontent too small (".strlen($plainText)." chars < $MINIMUM_TEXT_SIZE )\n";
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
echo "🟢 Will add to Readeck $link\nLength: " . strlen($plainText)."\n";
|
|
|
|
|
|
|
|
|
|
//not passing title here, since we don't have it
|
|
|
|
|
$payload = json_encode([
|
|
|
|
|
"labels" => ["automasto"],
|
|
|
|
|
"url" => $link
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
curl_setopt($ch, CURLOPT_URL, $apiUrl);
|
|
|
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
|
|
|
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
|
|
|
|
|
|
|
|
|
|
$response = curl_exec($ch);
|
|
|
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (curl_errno($ch)) {
|
|
|
|
|
echo "❌ Error adding $link: " . curl_error($ch) . "\n";
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
|
|
// Store already sent file only if connection worked
|
|
|
|
|
file_put_contents($filePath, $link);
|
|
|
|
|
|
|
|
|
|
$json = json_decode($response, true);
|
|
|
|
|
if (json_last_error() === JSON_ERROR_NONE) {
|
|
|
|
|
if ($httpCode >= 200 && $httpCode < 300) {
|
|
|
|
|
echo "✅ [$httpCode] Successfully added: $link\n";
|
|
|
|
|
} else {
|
|
|
|
|
echo "⚠️ Server returned status $httpCode for $link\n";
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
echo "⚠️ Response is not valid JSON for $link: $response\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
curl_close($ch);
|