From 32ca410f4edbff578d71781d943c41573912f476 Mon Sep 17 00:00:00 2001 From: amrkmn Date: Fri, 26 Dec 2025 22:39:23 +0800 Subject: Initial commit --- scripts/cache.ts | 200 +++++++++++++++++++++++++++++++++++++++++ scripts/cache/files.ts | 138 +++++++++++++++++++++++++++++ scripts/cache/lock.ts | 220 ++++++++++++++++++++++++++++++++++++++++++++++ scripts/cache/logger.ts | 152 ++++++++++++++++++++++++++++++++ scripts/cache/manifest.ts | 76 ++++++++++++++++ scripts/cache/metadata.ts | 96 ++++++++++++++++++++ scripts/cache/s3.ts | 117 ++++++++++++++++++++++++ scripts/cache/utils.ts | 85 ++++++++++++++++++ scripts/config.ts | 20 +++++ scripts/meilisearch.ts | 158 +++++++++++++++++++++++++++++++++ scripts/types.ts | 31 +++++++ scripts/update.ts | 183 ++++++++++++++++++++++++++++++++++++++ scripts/worker.ts | 6 ++ 13 files changed, 1482 insertions(+) create mode 100644 scripts/cache.ts create mode 100644 scripts/cache/files.ts create mode 100644 scripts/cache/lock.ts create mode 100644 scripts/cache/logger.ts create mode 100644 scripts/cache/manifest.ts create mode 100644 scripts/cache/metadata.ts create mode 100644 scripts/cache/s3.ts create mode 100644 scripts/cache/utils.ts create mode 100644 scripts/config.ts create mode 100644 scripts/meilisearch.ts create mode 100644 scripts/types.ts create mode 100644 scripts/update.ts create mode 100644 scripts/worker.ts (limited to 'scripts') diff --git a/scripts/cache.ts b/scripts/cache.ts new file mode 100644 index 0000000..f16bed4 --- /dev/null +++ b/scripts/cache.ts @@ -0,0 +1,200 @@ +import type { S3Client } from 'bun'; +import { join } from 'path'; +import { cleanupDir, compressToTar, ensureDir, extractTar, validateCache } from './cache/files'; +import { withLock } from './cache/lock'; +import { log } from './cache/logger'; +import { addCacheEntry } from './cache/manifest'; +import { loadMetadata, saveMetadata, updateBothAccessTimes } from './cache/metadata'; +import { cleanupOldCaches, ENABLED, getClient, resolveCacheKey } from './cache/s3'; +import { CACHE_FILE_NAME, TMP_DIR } from './cache/utils'; + +const CACHE_FILE_PATH = join(TMP_DIR, CACHE_FILE_NAME); + +function formatBytes(bytes: number): string { + return (bytes / (1024 * 1024)).toFixed(2); +} + +async function downloadCache(s3: S3Client, key: string, targetPath: string): Promise { + const s3File = s3.file(key); + const stream = s3File.stream(); + const writer = Bun.file(targetPath).writer(); + + const transfer = log.transfer('Received'); + let downloadedBytes = 0; + + for await (const chunk of stream) { + writer.write(chunk); + downloadedBytes += chunk.length; + transfer.progress(downloadedBytes); + } + await writer.end(); + + transfer.complete(downloadedBytes); + + return downloadedBytes; +} + +async function uploadCache(s3: S3Client, key: string, sourcePath: string): Promise { + const cacheFile = Bun.file(sourcePath); + const stream = cacheFile.stream(); + + const s3File = s3.file(key); + const writer = s3File.writer({ + partSize: 10 * 1024 * 1024, // 10 MB + queueSize: 4, + retry: 3 + }); + + const timer = log.timer('Uploading cache'); + let uploadedBytes = 0; + + // Start a timer to log progress every second + const progressInterval = setInterval(() => { + timer.progress(); + }, 1000); + + try { + for await (const chunk of stream) { + writer.write(chunk); + uploadedBytes += chunk.length; + } + + await writer.end(); + return uploadedBytes; + } finally { + clearInterval(progressInterval); + timer.complete(); + } +} + +export async function restoreCache( + paths: string[], + key: string, + restoreKeys?: string[] +): Promise { + if (!ENABLED) { + console.log('R2 Cache disabled'); + return undefined; + } + + const s3 = getClient(); + if (!s3) return undefined; + + try { + // Find matching cache (exact or prefix match) + const matchedKey = await resolveCacheKey(s3, key, restoreKeys); + if (!matchedKey) { + console.log('Cache not found'); + return undefined; + } + + // Check if local cache is still valid + const metadata = await loadMetadata(s3, matchedKey); + if (metadata && (await validateCache(metadata))) { + await updateBothAccessTimes(s3, matchedKey, metadata); + return matchedKey; + } + + await ensureDir(TMP_DIR); + + // Ensure all target paths exist + for (const path of paths) { + await ensureDir(path); + } + + console.log(`Downloading cache from key: ${matchedKey}`); + const startTime = Date.now(); + + const downloadedBytes = await downloadCache(s3, matchedKey, CACHE_FILE_PATH); + + const downloadTime = Date.now() - startTime; + const sizeInMB = formatBytes(downloadedBytes); + + console.log(`Cache Size: ~${sizeInMB} MB (${downloadedBytes} B)`); + console.log(`Cache downloaded in ${(downloadTime / 1000).toFixed(2)}s`); + + console.log('Extracting cache...'); + const extractStartTime = Date.now(); + await extractTar(CACHE_FILE_PATH); + const extractTime = Date.now() - extractStartTime; + console.log(`Cache extracted in ${(extractTime / 1000).toFixed(2)}s`); + + await cleanupDir(TMP_DIR); + + // Update access time after successful restore + const newMetadata = await loadMetadata(s3, matchedKey); + if (newMetadata) { + await updateBothAccessTimes(s3, matchedKey, newMetadata); + } + + console.log(`Cache restored successfully`); + return matchedKey; + } catch (e) { + console.error('Failed to restore cache:', e); + return undefined; + } +} + +export async function saveCache(paths: string[], key: string): Promise { + if (!ENABLED) return; + + const s3 = getClient(); + if (!s3) return; + + // Use withLock for automatic lock management with renewal + const result = await withLock(s3, async () => { + // Check if cache already exists before compressing + const cacheFile = s3.file(key); + if (await cacheFile.exists()) { + console.log(`Cache already exists: ${key}, skipping upload`); + return; + } + + await ensureDir(TMP_DIR); + + // Compress and calculate checksums + console.log('Compressing cache...'); + const compressStartTime = Date.now(); + const files = await compressToTar(paths, CACHE_FILE_PATH); + const compressTime = Date.now() - compressStartTime; + console.log(`Cache compressed in ${(compressTime / 1000).toFixed(2)}s`); + + const cache = Bun.file(CACHE_FILE_PATH); + const sizeInBytes = cache.size; + const sizeInMB = formatBytes(sizeInBytes); + + console.log(`Cache Size: ~${sizeInMB} MB (${sizeInBytes} B)`); + console.log(`Uploading cache to key: ${key}`); + + const startTime = Date.now(); + + await uploadCache(s3, key, CACHE_FILE_PATH); + + const uploadTime = Date.now() - startTime; + const uploadSpeed = sizeInBytes / (1024 * 1024) / (uploadTime / 1000); + console.log( + `Cache uploaded in ${(uploadTime / 1000).toFixed(2)}s (${uploadSpeed.toFixed(2)} MB/s)` + ); + + const timestamp = Date.now(); + + // Save metadata and get hash + const hash = await saveMetadata(s3, key, files, CACHE_FILE_PATH); + await cleanupDir(TMP_DIR); + + // Add entry to manifest + await addCacheEntry(s3, key, hash, timestamp); + + console.log(`Cache saved successfully`); + + // Extract prefix for cleanup (e.g., "extensions-abc.tgz" -> "extensions-") + const prefix = key.split('-')[0] + '-'; + await cleanupOldCaches(s3, prefix); + + return; + }); + + if (result === null) { + console.error('Failed to acquire lock for cache save'); + } +} diff --git a/scripts/cache/files.ts b/scripts/cache/files.ts new file mode 100644 index 0000000..e2bdf8b --- /dev/null +++ b/scripts/cache/files.ts @@ -0,0 +1,138 @@ +import { $ } from 'bun'; +import { mkdir, readdir, rm, exists } from 'fs/promises'; +import { join, relative, sep } from 'path'; +import type { CacheMetadata, FileMetadata } from './utils'; + +export async function calculateFileChecksum(filePath: string): Promise { + const fileBlob = Bun.file(filePath); + const size = fileBlob.size; + + const hasher = new Bun.CryptoHasher('sha256'); + if (size <= 10 * 1024 * 1024 /** 10MB */) + return hasher.update(await fileBlob.arrayBuffer()).digest('hex'); + + const reader = fileBlob.stream().getReader(); + while (true) { + const { done, value } = await reader.read(); + if (done) break; + if (value) hasher.update(value); + } + + return hasher.digest('hex'); +} + +export async function calculateDirectoryChecksums( + paths: string[] +): Promise> { + const files: Record = {}; + + for (const path of paths) { + const entries = await readdir(path, { + recursive: true, + withFileTypes: true + }); + + await Promise.all( + entries + .filter((entry) => entry.isFile()) + .map(async (entry) => { + const fullPath = join(entry.parentPath, entry.name); + const relativePath = relative('.', fullPath).split(sep).join('/'); + + const size = Bun.file(fullPath).size; + const checksum = await calculateFileChecksum(fullPath); + + files[relativePath] = { checksum, size }; + }) + ); + } + + return files; +} + +export async function validateCache(metadata: CacheMetadata): Promise { + console.log('Validating cache...'); + let valid = 0; + let invalid = 0; + let missing = 0; + + const totalFiles = Object.keys(metadata.files).length; + + for (const [filePath, fileInfo] of Object.entries(metadata.files)) { + const fullPath = join('.', filePath); + + if (!(await exists(fullPath))) { + missing++; + continue; + } + + try { + const actualChecksum = await calculateFileChecksum(fullPath); + if (actualChecksum === fileInfo.checksum) valid++; + else invalid++; + } catch (e) { + invalid++; + } + } + + const isValid = invalid === 0 && missing === 0; + + if (isValid) { + console.log(`Cache is valid: ${valid} files matched`); + } else { + console.log( + `Cache validation failed: ${valid} valid, ${invalid} invalid, ${missing} missing (total: ${totalFiles})` + ); + } + + return isValid; +} + +export async function extractTar(tarPath: string): Promise { + const compressedData = await Bun.file(tarPath).arrayBuffer(); + const decompressed = Bun.zstdDecompressSync(new Uint8Array(compressedData)); + + // Write decompressed tar to temp file + const tempTarPath = tarPath + '.tmp'; + await Bun.write(tempTarPath, decompressed); + + await $`tar -xf ${tempTarPath}`.quiet().finally(async () => { + await rm(tempTarPath).catch(() => {}); + }); +} + +export async function compressToTar( + paths: string[], + outputPath: string +): Promise> { + const checksums = await calculateDirectoryChecksums(paths); + + const tempTarPath = outputPath + '.tmp'; + await $`tar -cf ${tempTarPath} ${paths}`.quiet(); + + try { + const tarData = await Bun.file(tempTarPath).arrayBuffer(); + const compressed = Bun.zstdCompressSync(new Uint8Array(tarData)); + await Bun.write(outputPath, compressed); + } finally { + await rm(tempTarPath).catch(() => {}); + } + + return checksums; +} + +export async function ensureDir(dir: string): Promise { + if (!(await exists(dir))) { + await mkdir(dir, { recursive: true }); + } +} + +export async function cleanupDir(dir: string): Promise { + try { + await rm(dir, { recursive: true, force: true }); + } catch (e: any) { + if (e.code !== 'EBUSY' && e.code !== 'ENOTEMPTY') { + throw e; + } + } +} diff --git a/scripts/cache/lock.ts b/scripts/cache/lock.ts new file mode 100644 index 0000000..674f4be --- /dev/null +++ b/scripts/cache/lock.ts @@ -0,0 +1,220 @@ +import type { S3Client } from 'bun'; +import { hostname } from 'os'; +import type { CacheLock } from './utils'; +import { + LOCK_DOUBLE_CHECK_MS, + LOCK_KEY, + LOCK_MAX_RETRIES, + LOCK_RETRY_MAX_MS, + LOCK_RETRY_START_MS, + LOCK_TIMEOUT_MS, + writeJsonToS3 +} from './utils'; + +export function generateInstanceId(): string { + return `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`; +} + +/** + * Checks if a process is still running (only works on same machine). + * Uses Node.js process.kill(pid, 0) which doesn't actually kill but checks existence. + */ +function isProcessRunning(pid: number): boolean { + try { + // Signal 0 doesn't kill, just checks if process exists + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +/** + * Checks if a lock is stale based on Restic's algorithm: + * 1. If timestamp is older than LOCK_TIMEOUT_MS (30 min), it's stale + * 2. If on same machine and process doesn't exist, it's stale + */ +function isLockStale(lock: CacheLock): boolean { + const lockAge = Date.now() - lock.timestamp; + const timeSinceRenewal = lock.renewedAt ? Date.now() - lock.renewedAt : lockAge; + + // Check 1: Timestamp-based staleness (30 minutes) + if (timeSinceRenewal > LOCK_TIMEOUT_MS) { + return true; + } + + // Check 2: Process-based staleness (only on same machine) + if (lock.hostname === hostname()) { + if (!isProcessRunning(lock.pid)) { + return true; + } + } + + return false; +} + +/** + * Acquires a distributed lock using Restic's double-check pattern. + * This is simpler and more reliable than the lease-based approach. + * + * Algorithm (inspired by Restic): + * 1. Check for existing locks + * 2. If lock exists and is NOT stale, retry with exponential backoff + * 3. If no lock or stale lock found, create our lock + * 4. Wait 200ms (LOCK_DOUBLE_CHECK_MS) + * 5. Re-check: verify we still own the lock + * 6. If verification fails, we lost the race - retry + */ +export async function acquireLock(s3: S3Client, instanceId: string): Promise { + const lockFile = s3.file(LOCK_KEY); + let retryDelay = LOCK_RETRY_START_MS; + + for (let attempt = 0; attempt < LOCK_MAX_RETRIES; attempt++) { + try { + // Step 1: Check for existing lock + if (await lockFile.exists()) { + const lockContent = await lockFile.text(); + const existingLock: CacheLock = JSON.parse(lockContent); + + // Check if lock is stale + if (isLockStale(existingLock)) { + console.log('Stale lock detected, removing...'); + await lockFile.delete().catch(() => {}); + } else { + // Lock is valid, need to retry + console.log( + `Lock busy, retrying in ${retryDelay / 1000}s (${attempt + 1}/${LOCK_MAX_RETRIES})...` + ); + await Bun.sleep(retryDelay); + + // Exponential backoff: double delay each time, up to max + retryDelay = Math.min(retryDelay * 2, LOCK_RETRY_MAX_MS); + continue; + } + } + + // Step 2: Create new lock + const newLock: CacheLock = { + locked: true, + timestamp: Date.now(), + instance: instanceId, + ttl: LOCK_TIMEOUT_MS, + renewedAt: Date.now(), + pid: process.pid, + hostname: hostname() + }; + + await writeJsonToS3(s3, LOCK_KEY, newLock); + + // Step 3: Wait for double-check delay (Restic's waitBeforeLockCheck pattern) + // This allows any racing processes to also write their locks + await Bun.sleep(LOCK_DOUBLE_CHECK_MS); + + // Step 4: Verify we still own the lock (detect race conditions) + if (await lockFile.exists()) { + const verifyContent = await lockFile.text(); + const verifyLock: CacheLock = JSON.parse(verifyContent); + + if (verifyLock.instance === instanceId) { + // Successfully acquired lock + console.log('Lock acquired'); + return instanceId; + } + } + + // Lost the race - another process overwrote our lock + // Retry with exponential backoff + console.log(`Lost lock race, retrying in ${retryDelay / 1000}s...`); + await Bun.sleep(retryDelay); + retryDelay = Math.min(retryDelay * 2, LOCK_RETRY_MAX_MS); + } catch (e) { + console.error(`Lock error: ${e}`); + await Bun.sleep(retryDelay); + retryDelay = Math.min(retryDelay * 2, LOCK_RETRY_MAX_MS); + } + } + + console.error('Failed to acquire lock'); + return null; +} + +/** + * Renews the lock to extend its TTL. Should be called periodically during long operations. + */ +export async function renewLock(s3: S3Client, instanceId: string): Promise { + const lockFile = s3.file(LOCK_KEY); + + try { + if (!(await lockFile.exists())) { + return false; + } + + const lock: CacheLock = JSON.parse(await lockFile.text()); + + if (lock.instance !== instanceId) { + return false; + } + + // Update renewal time + lock.renewedAt = Date.now(); + await writeJsonToS3(s3, LOCK_KEY, lock); + + console.log('Lock renewed'); + return true; + } catch (e) { + console.error('Failed to renew lock:', e); + return false; + } +} + +/** + * Releases the lock if owned by this instance. + */ +export async function releaseLock(s3: S3Client, instanceId: string): Promise { + const lockFile = s3.file(LOCK_KEY); + + try { + if (!(await lockFile.exists())) { + return; + } + + const lock: CacheLock = JSON.parse(await lockFile.text()); + + if (lock.instance === instanceId) { + await lockFile.delete(); + console.log('Lock released'); + } + } catch (e) { + console.error('Failed to release lock:', e); + } +} + +/** + * Executes a callback while holding the lock, with automatic renewal. + */ +export async function withLock( + s3: S3Client, + callback: (instanceId: string) => Promise +): Promise { + const instanceId = generateInstanceId(); + const lockToken = await acquireLock(s3, instanceId); + + if (!lockToken) { + return null; + } + + // Setup automatic lock renewal every 2 minutes + const renewalInterval = setInterval( + async () => { + await renewLock(s3, instanceId); + }, + 2 * 60 * 1000 + ); + + try { + return await callback(instanceId); + } finally { + clearInterval(renewalInterval); + await releaseLock(s3, instanceId); + } +} diff --git a/scripts/cache/logger.ts b/scripts/cache/logger.ts new file mode 100644 index 0000000..1743370 --- /dev/null +++ b/scripts/cache/logger.ts @@ -0,0 +1,152 @@ +/** + * Checks if the current environment supports interactive terminal features + * like carriage return (\r) for progress updates. + * + * Returns false for: + * - Non-TTY environments (CI/CD logs, file redirects) + * - Dumb terminals + * - Environments without cursor control support + */ +function isInteractiveTerminal(): boolean { + // Check if stdout is a TTY (interactive terminal) + if (!process.stdout.isTTY) return false; + // Check for dumb terminal + if (process.env.TERM === 'dumb') return false; + // Check for CI environments (most set CI=true) + if (process.env.CI === 'true' || process.env.CI === '1') return false; + + // Check for common CI environment variables + const ciEnvVars = [ + 'GITHUB_ACTIONS', + 'GITLAB_CI', + 'CIRCLECI', + 'TRAVIS', + 'JENKINS_HOME', + 'BUILDKITE', + 'DRONE', + 'RENDER', // Render.com + 'CF_PAGES', // Cloudflare Pages + 'VERCEL' // Vercel + ]; + + for (const envVar of ciEnvVars) { + if (process.env[envVar]) return false; + } + + return true; +} + +/** + * Formats transfer statistics (size and speed). + */ +function formatTransferStats(bytes: number, elapsedSeconds: number): string { + const sizeMB = (bytes / (1024 * 1024)).toFixed(2); + const speedMBps = (bytes / (1024 * 1024) / elapsedSeconds).toFixed(2); + return `${sizeMB} MB (${speedMBps} MB/s)`; +} + +class TimerLogger { + private isInteractive: boolean; + private startTime: number; + private lastLogTime: number; + private prefix: string; + + constructor(prefix: string) { + this.isInteractive = isInteractiveTerminal(); + this.startTime = Date.now(); + this.lastLogTime = this.startTime; + this.prefix = prefix; + } + + /** + * Logs timer progress at regular intervals (throttled to 1 second). + */ + progress(): this { + const now = Date.now(); + if (now - this.lastLogTime >= 1000) { + const elapsed = (now - this.startTime) / 1000; + const message = `${this.prefix} (${elapsed.toFixed(0)}s)...`; + + if (this.isInteractive) process.stdout.write(`\r${message}`); + else console.log(message); + + this.lastLogTime = now; + } + return this; + } + + /** + * Logs final timer completion message. + */ + complete(): void { + const elapsed = (Date.now() - this.startTime) / 1000; + const message = `${this.prefix} (${elapsed.toFixed(0)}s)`; + + if (this.isInteractive) process.stdout.write(`\r\x1b[K${message}\n`); + else console.log(message); + } +} + +class TransferLogger { + private isInteractive: boolean; + private startTime: number; + private lastLogTime: number; + private prefix: string; + + constructor(prefix: string) { + this.isInteractive = isInteractiveTerminal(); + this.startTime = Date.now(); + this.lastLogTime = this.startTime; + this.prefix = prefix; + } + + /** + * Logs transfer progress at regular intervals (throttled to 1 second). + */ + progress(bytes: number): this { + const now = Date.now(); + if (now - this.lastLogTime >= 1000) { + const elapsed = (now - this.startTime) / 1000; + const message = `${this.prefix} ${formatTransferStats(bytes, elapsed)}...`; + + if (this.isInteractive) process.stdout.write(`\r${message}`); + else console.log(message); + + this.lastLogTime = now; + } + return this; + } + + /** + * Logs final transfer completion message. + */ + complete(bytes: number): void { + if (bytes > 0) { + const elapsed = (Date.now() - this.startTime) / 1000; + const message = `${this.prefix} ${formatTransferStats(bytes, elapsed)}`; + + if (this.isInteractive) process.stdout.write(`\r\x1b[K${message}\n`); + else console.log(message); + } + } +} + +class Logger { + /** + * Creates a timer progress logger. + * Usage: log.timer('Uploading cache').progress().complete() + */ + timer(prefix: string): TimerLogger { + return new TimerLogger(prefix); + } + + /** + * Creates a transfer progress logger. + * Usage: log.transfer('Received').progress(bytes).complete(bytes) + */ + transfer(prefix: string): TransferLogger { + return new TransferLogger(prefix); + } +} + +export const log = new Logger(); diff --git a/scripts/cache/manifest.ts b/scripts/cache/manifest.ts new file mode 100644 index 0000000..7558047 --- /dev/null +++ b/scripts/cache/manifest.ts @@ -0,0 +1,76 @@ +import type { S3Client } from 'bun'; +import type { CacheEntry, CacheManifest } from './utils'; +import { writeJsonToS3 } from './utils'; + +const MANIFEST_KEY = 'manifest.json'; +const MANIFEST_VERSION = 1; + +export async function loadManifest(s3: S3Client): Promise { + const manifestFile = s3.file(MANIFEST_KEY); + + try { + if (await manifestFile.exists()) { + const data = await manifestFile.text(); + const manifest: CacheManifest = JSON.parse(data); + + if (manifest.version === MANIFEST_VERSION) { + return manifest; + } + } + } catch (e) { + console.error('Failed to load manifest:', e); + } + + // Return empty manifest if not found or invalid + return { + version: MANIFEST_VERSION, + caches: [] + }; +} + +export async function saveManifest(s3: S3Client, manifest: CacheManifest): Promise { + await writeJsonToS3(s3, MANIFEST_KEY, manifest); +} + +export async function addCacheEntry( + s3: S3Client, + key: string, + hash: string, + timestamp: number +): Promise { + const manifest = await loadManifest(s3); + + // Remove existing entry with same key if exists + manifest.caches = manifest.caches.filter((entry) => entry.key !== key); + + // Add new entry + manifest.caches.push({ + key, + hash, + timestamp, + lastAccessed: timestamp + }); + + await saveManifest(s3, manifest); +} + +export async function removeCacheEntry(s3: S3Client, key: string): Promise { + const manifest = await loadManifest(s3); + manifest.caches = manifest.caches.filter((entry) => entry.key !== key); + await saveManifest(s3, manifest); +} + +export function findCacheByKey(manifest: CacheManifest, key: string): CacheEntry | null { + return manifest.caches.find((entry) => entry.key === key) || null; +} + +export function findCacheByPrefix(manifest: CacheManifest, prefix: string): CacheEntry | null { + const matching = manifest.caches.filter((entry) => entry.key.startsWith(prefix)); + + if (matching.length === 0) { + return null; + } + + // Return most recently created cache + return matching.sort((a, b) => b.timestamp - a.timestamp)[0]; +} diff --git a/scripts/cache/metadata.ts b/scripts/cache/metadata.ts new file mode 100644 index 0000000..2326d08 --- /dev/null +++ b/scripts/cache/metadata.ts @@ -0,0 +1,96 @@ +import type { S3Client } from 'bun'; +import { METADATA_VERSION, writeJsonToS3 } from './utils'; +import type { CacheMetadata, FileMetadata } from './utils'; + +function getMetadataKey(cacheKey: string): string { + return `${cacheKey}.meta.json`; +} + +export async function saveMetadata( + s3: S3Client, + key: string, + files: Record, + cacheFilePath: string +): Promise { + const content = await Bun.file(cacheFilePath).arrayBuffer(); + const hash = new Bun.CryptoHasher('sha256').update(content).digest('hex'); + + const metadata: CacheMetadata = { + key, + hash, + timestamp: Date.now(), + lastAccessed: Date.now(), + files, + version: METADATA_VERSION + }; + + const metadataKey = getMetadataKey(key); + await writeJsonToS3(s3, metadataKey, metadata); + + console.log(`Metadata saved: ${metadataKey}`); + return hash; +} + +export async function loadMetadata(s3: S3Client, cacheKey: string): Promise { + const metadataKey = getMetadataKey(cacheKey); + const metadataFile = s3.file(metadataKey); + + try { + if (!(await metadataFile.exists())) { + return null; + } + + const metadata: CacheMetadata = JSON.parse(await metadataFile.text()); + + if (metadata.version !== METADATA_VERSION) { + return null; + } + + return metadata; + } catch (e) { + console.error('Failed to load metadata:', e); + return null; + } +} + +async function updateMetadataAccessTime( + s3: S3Client, + cacheKey: string, + metadata: CacheMetadata +): Promise { + metadata.lastAccessed = Date.now(); + + const metadataKey = getMetadataKey(cacheKey); + await writeJsonToS3(s3, metadataKey, metadata); +} + +export async function updateBothAccessTimes( + s3: S3Client, + cacheKey: string, + metadata: CacheMetadata +): Promise { + await updateMetadataAccessTime(s3, cacheKey, metadata); + + // Also update manifest + const { loadManifest, saveManifest } = await import('./manifest'); + const manifest = await loadManifest(s3); + const entry = manifest.caches.find((e) => e.key === cacheKey); + + if (entry) { + entry.lastAccessed = Date.now(); + await saveManifest(s3, manifest); + } +} + +export async function deleteMetadata(s3: S3Client, cacheKey: string): Promise { + const metadataKey = getMetadataKey(cacheKey); + const metadataFile = s3.file(metadataKey); + + try { + if (await metadataFile.exists()) { + await metadataFile.delete(); + } + } catch (e) { + console.error(`Failed to delete metadata: ${e}`); + } +} diff --git a/scripts/cache/s3.ts b/scripts/cache/s3.ts new file mode 100644 index 0000000..ffbb6ac --- /dev/null +++ b/scripts/cache/s3.ts @@ -0,0 +1,117 @@ +import { S3Client } from 'bun'; +import { MAX_CACHE_AGE_DAYS, MAX_CACHE_FILES } from './utils'; +import { findCacheByKey, findCacheByPrefix, loadManifest, removeCacheEntry } from './manifest'; +import { deleteMetadata } from './metadata'; + +const s3Config = { + ENDPOINT: process.env.S3_ENDPOINT, + ACCESS_KEY_ID: process.env.S3_ACCESS_KEY_ID, + SECRET_ACCESS_KEY: process.env.S3_SECRET_ACCESS_KEY, + BUCKET_NAME: process.env.S3_BUCKET_NAME, + REGION: process.env.S3_REGION +}; + +export const ENABLED = + !!s3Config.ENDPOINT && + !!s3Config.ACCESS_KEY_ID && + !!s3Config.SECRET_ACCESS_KEY && + !!s3Config.BUCKET_NAME; + +let client: S3Client | null = null; + +export function getClient(): S3Client | null { + if (!ENABLED || client) return client; + + client = new S3Client({ + endpoint: s3Config.ENDPOINT, + accessKeyId: s3Config.ACCESS_KEY_ID, + secretAccessKey: s3Config.SECRET_ACCESS_KEY, + bucket: s3Config.BUCKET_NAME, + region: s3Config.REGION + }); + return client; +} + +const cacheExists = async (s3: S3Client, key: string) => + await s3 + .file(key) + .exists() + .catch(() => false); + +const cleanupStaleCache = async (s3: S3Client, key: string): Promise => { + console.log(`Cleaning stale cache from manifest (cache missing): ${key}`); + await deleteMetadata(s3, key); + await removeCacheEntry(s3, key); +}; + +export async function resolveCacheKey( + s3: S3Client, + key: string, + restoreKeys?: string[] +): Promise { + const manifest = await loadManifest(s3); + + // Try exact match first + const exactMatch = findCacheByKey(manifest, key); + if (exactMatch) { + if (await cacheExists(s3, exactMatch.key)) { + return exactMatch.key; + } + await cleanupStaleCache(s3, exactMatch.key); + } + + // Try restore keys in order (prefix matching), preferring most recent + if (restoreKeys && restoreKeys.length > 0) { + for (const prefix of restoreKeys) { + const match = findCacheByPrefix(manifest, prefix); + if (match) { + if (await cacheExists(s3, match.key)) { + return match.key; + } + await cleanupStaleCache(s3, match.key); + } + } + } + + return null; +} + +export async function cleanupOldCaches(s3: S3Client, prefix: string): Promise { + const manifest = await loadManifest(s3); + + // Filter caches by prefix + const filesWithMetadata = manifest.caches + .filter((entry) => entry.key.startsWith(prefix)) + .map((entry) => ({ + key: entry.key, + lastAccessed: entry.lastAccessed, + timestamp: entry.timestamp + })); + + // Sort by lastAccessed (most recently accessed first) + const files = filesWithMetadata.sort((a, b) => b.lastAccessed - a.lastAccessed); + + const now = Date.now(); + const maxAge = MAX_CACHE_AGE_DAYS * 24 * 60 * 60 * 1000; + let manifestUpdated = false; + + for (let i = 0; i < files.length; i++) { + const entry = files[i]; + const age = now - entry.lastAccessed; + const shouldDelete = i >= MAX_CACHE_FILES || age > maxAge; + + if (shouldDelete) { + console.log( + `Deleting cache: ${entry.key} (age: ${Math.floor(age / (24 * 60 * 60 * 1000))} days, position: ${i + 1})` + ); + await s3.file(entry.key).delete(); + await deleteMetadata(s3, entry.key); + await removeCacheEntry(s3, entry.key); + manifestUpdated = true; + } + } + + if (manifestUpdated) { + console.log('Manifest updated after cleanup'); + } +} diff --git a/scripts/cache/utils.ts b/scripts/cache/utils.ts new file mode 100644 index 0000000..c095000 --- /dev/null +++ b/scripts/cache/utils.ts @@ -0,0 +1,85 @@ +import type { S3Client } from 'bun'; + +// ============================================================================ +// Types +// ============================================================================ + +export interface FileMetadata { + checksum: string; + size: number; +} + +export interface CacheMetadata { + key: string; + hash: string; + timestamp: number; + lastAccessed: number; + files: Record; + version: number; +} + +export interface CacheLock { + locked: boolean; + timestamp: number; + instance: string; + ttl: number; + renewedAt?: number; + pid: number; + hostname: string; +} + +export interface S3ListObject { + key: string; + lastModified?: string; +} + +export interface CacheEntry { + key: string; + timestamp: number; + lastAccessed: number; + hash: string; +} + +export interface CacheManifest { + version: number; + caches: CacheEntry[]; +} + +// ============================================================================ +// Constants +// ============================================================================ + +export const LOCK_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes (matches Restic) +export const LOCK_RETRY_START_MS = 5000; // 5 seconds (initial retry delay) +export const LOCK_RETRY_MAX_MS = 60000; // 60 seconds (max retry delay) +export const LOCK_MAX_RETRIES = 6; // With exponential backoff: 5s, 10s, 20s, 40s, 60s, 60s +export const LOCK_DOUBLE_CHECK_MS = 200; // 200ms delay for double-check pattern (matches Restic) + +export const METADATA_VERSION = 1; +export const METADATA_KEY = 'metadata.json'; +export const LOCK_KEY = 'cache.lock'; + +export const MAX_CACHE_FILES = 7; +export const MAX_CACHE_AGE_DAYS = 7; + +export const TMP_DIR = 'tmp'; +export const STATIC_DIR = 'static'; +export const CACHE_FILE_NAME = 'extensions-cache.tzst'; + +// Cache configuration +export const CACHE_PATHS = ['static']; +export const CACHE_KEY_PREFIX = 'extensions-'; +export const CACHE_RESTORE_KEYS = ['extensions-']; +export const EXTENSIONS_CONFIG_FILE = 'extensions.json'; + +// Helper to generate cache key from extensions.json +export async function generateCacheKey(): Promise { + const content = await Bun.file(EXTENSIONS_CONFIG_FILE).arrayBuffer(); + const hash = new Bun.CryptoHasher('sha256').update(content).digest('hex'); + return `${CACHE_KEY_PREFIX}${hash}.tzst`; +} + +// Helper to write JSON to S3 file +export async function writeJsonToS3(s3: S3Client, key: string, data: any): Promise { + await Bun.write(s3.file(key), JSON.stringify(data, null, 2)); +} diff --git a/scripts/config.ts b/scripts/config.ts new file mode 100644 index 0000000..1ea762e --- /dev/null +++ b/scripts/config.ts @@ -0,0 +1,20 @@ +export const config = { + github: { + owner: 'amrkmn', + repo: 'x', + branch: 'main' + }, + domains: [ + 'https://x.noz.one', // + 'https://x.ujol.dev', + 'https://x.amar.kim', + 'https://x.ujol.workers.dev' + ], + filesToCopy: [ + 'index.json', // + 'index.min.json', + 'repo.json', + 'apk', + 'icon' + ] +}; diff --git a/scripts/meilisearch.ts b/scripts/meilisearch.ts new file mode 100644 index 0000000..a900ceb --- /dev/null +++ b/scripts/meilisearch.ts @@ -0,0 +1,158 @@ +import { MeiliSearch } from 'meilisearch'; +import { readdir } from 'fs/promises'; +import { join } from 'path'; + +interface Extension { + name: string; + pkg: string; + apk: string; + lang: string; + code: number; + version: string; + nsfw: number; +} + +interface EnrichedExtension extends Extension { + id: string; + category: string; + sourceName: string; + formattedSourceName: string; + repoUrl: string; +} + +interface SourceMapping { + name: string; + repoUrl: string; + category: string; +} + +async function buildSourceMapping(path: string): Promise> { + const mapping = new Map(); + const data = await Bun.file(path).json(); + + for (const category in data.extensions) { + for (const repo of data.extensions[category]) { + const normalizedPath = repo.path.replace(/^\//, ''); + mapping.set(normalizedPath, { + name: repo.name, + repoUrl: repo.path.substring(0, repo.path.lastIndexOf('/')), + category + }); + } + } + return mapping; +} + +async function findExtensionFiles(dir: string): Promise { + let results: string[] = []; + try { + const entries = await readdir(dir, { withFileTypes: true }); + for (const file of entries) { + const path = join(dir, file.name); + if (file.isDirectory()) results.push(...(await findExtensionFiles(path))); + else if (file.name === 'index.min.json') results.push(path); + } + } catch (e) { + console.error(`Error reading ${dir}:`, e); + } + return results; +} + +export async function updateMeilisearch() { + const env = { + host: process.env.MEILISEARCH_HOST, + apiKey: process.env.MEILISEARCH_MASTER_KEY + }; + + if (!env.host || !env.apiKey) { + console.log('Skipping Meilisearch update (not configured)'); + return; + } + + console.log('Updating Meilisearch index...'); + const STATIC_DIR = join(process.cwd(), 'static'); + + try { + const client = new MeiliSearch({ host: env.host, apiKey: env.apiKey }); + await client.health(); + const index = client.index('extensions'); + + await index.updateSettings({ + searchableAttributes: ['name', 'pkg', 'lang', 'sourceName'], + filterableAttributes: [ + 'sourceName', + 'formattedSourceName', + 'category', + 'lang', + 'nsfw', + 'pkg' + ], + sortableAttributes: ['name', 'lang', 'version'], + rankingRules: ['words', 'typo', 'proximity', 'attribute', 'sort', 'exactness'], + pagination: { maxTotalHits: 10000 } + }); + + const sourceMapping = await buildSourceMapping(join(STATIC_DIR, 'data.json')); + const files = await findExtensionFiles(STATIC_DIR); + + if (!files.length) { + console.warn('No extension files found for Meilisearch'); + return; + } + + const allExtensions: EnrichedExtension[] = []; + + for (const file of files) { + try { + const extensions: Extension[] = await Bun.file(file).json(); + const relativePath = file + .replace(STATIC_DIR, '') + .replace(/\\/g, '/') + .replace(/^\//, ''); + const pathParts = relativePath.split('/').filter(Boolean); + const sourceInfo = sourceMapping.get(relativePath); + + const sourceName = sourceInfo?.name || pathParts[0] || 'Unknown'; + const repoUrl = sourceInfo?.repoUrl || '/' + pathParts.slice(0, -1).join('/'); + const category = + sourceInfo?.category || + (pathParts[0]?.toLowerCase().includes('anime') ? 'aniyomi' : 'mihon'); + const formattedSourceName = sourceName.toLowerCase().replace(/\s+/g, '.'); + const idSafeSourceName = formattedSourceName.replace(/\./g, '_'); + + allExtensions.push( + ...extensions.map((ext) => ({ + ...ext, + id: `${idSafeSourceName}-${ext.pkg.replace(/\./g, '_')}`, + category, + sourceName, + formattedSourceName, + repoUrl, + nsfw: typeof ext.nsfw === 'number' ? ext.nsfw : ext.nsfw ? 1 : 0 + })) + ); + } catch (err) { + console.error(`Error processing ${file}:`, err); + } + } + + const task = await index.updateDocuments(allExtensions, { primaryKey: 'id' }); + const result = await client.tasks.waitForTask(task.taskUid, { + timeout: 300000, + interval: 1000 + }); + + if (result.status === 'succeeded') { + const stats = await index.getStats(); + console.log(`Meilisearch updated: ${stats.numberOfDocuments} documents indexed`); + } else { + console.error('Meilisearch indexing failed:', result.error); + } + } catch (error) { + console.error('Meilisearch update error:', error); + } +} + +if (import.meta.main) { + await updateMeilisearch(); +} diff --git a/scripts/types.ts b/scripts/types.ts new file mode 100644 index 0000000..da11cd4 --- /dev/null +++ b/scripts/types.ts @@ -0,0 +1,31 @@ +// Type definitions for config.json +export interface ExtensionConfig { + source: string; + name: string; + path: string; + category: 'mihon' | 'aniyomi'; + commit?: string; +} + +export interface Config { + github: { + owner: string; + repo: string; + branch: string; + }; + domains: string[]; + directories: { + output: string; + extensions: string; + }; + filesToCopy: string[]; +} + +export interface ExtensionSources { + [category: string]: Array<{ + source: string; + name: string; + path: string; + commit?: string; + }>; +} diff --git a/scripts/update.ts b/scripts/update.ts new file mode 100644 index 0000000..8649b41 --- /dev/null +++ b/scripts/update.ts @@ -0,0 +1,183 @@ +import { $ } from 'bun'; +import { existsSync } from 'fs'; +import { appendFile, cp } from 'fs/promises'; +import { join } from 'path'; +import { restoreCache, saveCache } from './cache'; +import { CACHE_PATHS, CACHE_RESTORE_KEYS, generateCacheKey } from './cache/utils'; +import { config } from './config'; +import { updateMeilisearch } from './meilisearch'; +import type { ExtensionConfig } from './types'; + +const EXT_DIR = join(process.cwd(), 'static'); +const DATA_FILE = join(EXT_DIR, 'data.json'); +const TEMP_DIR = join(process.cwd(), 'tmp'); + +const extensionsData: Record> = await Bun.file( + 'extensions.json' +).json(); + +const setOutput = async (key: string, value: string) => + process.env.GITHUB_OUTPUT && (await appendFile(process.env.GITHUB_OUTPUT, `${key}=${value}\n`)); + +async function generateData() { + console.log('Generating data.json...'); + try { + const extensions = Object.fromEntries( + Object.entries(extensionsData).map(([category, exts]) => [ + category, + Object.values(exts).map(({ source, name, path, commit }) => ({ + source, + name, + path, + commit + })) + ]) + ); + + const commit = (await $`git rev-parse HEAD`.text()).trim(); + const { owner, repo } = config.github; + const source = `https://github.com/${owner}/${repo}`; + + await Bun.write( + DATA_FILE, + JSON.stringify({ + extensions, + domains: config.domains, + source, + commitLink: `${source}/commit/${commit}`, + latestCommitHash: commit.substring(0, 7) + }) + ); + console.log(`Generated data.json (${commit.substring(0, 7)})`); + } catch (error) { + console.error('Failed to generate data.json:', error); + process.exit(1); + } +} + +if (process.argv.includes('--generate-only')) { + await generateData(); + process.exit(0); +} + +if (process.argv.includes('--update-search')) { + console.log('Updating search index only...'); + await updateMeilisearch(); + process.exit(0); +} + +const quickMode = process.argv.includes('--quick'); +const useCache = !process.argv.includes('--no-cache') && !quickMode; + +if (useCache) await restoreCache(CACHE_PATHS, await generateCacheKey(), CACHE_RESTORE_KEYS); +else + console.log(quickMode ? 'Cache disabled for quick mode' : 'Cache disabled via --no-cache flag'); + +console.log('Checking for updates...'); +const synced = new Map(); +if (!quickMode) { + try { + Object.values((await Bun.file(DATA_FILE).json()).extensions || {}) + .flat() + .forEach((e: any) => e?.path && e?.commit && synced.set(e.path, e.commit)); + } catch {} +} + +const updates = ( + await Promise.all( + Object.entries(extensionsData).flatMap(([category, group]) => + Object.entries(group).map(async ([key, ext]) => { + try { + const dest = join(EXT_DIR, key); + const syncedHash = synced.get(ext.path); + + if (!quickMode && !existsSync(dest)) + return { category, key, ext, hash: ext.commit || 'HEAD' }; + + const remoteHash = ( + await $`git ls-remote ${ext.source} HEAD | cut -f1`.text() + ).trim(); + + if (quickMode && remoteHash !== ext.commit) { + console.log( + `[${ext.name}] Update available: ${ext.commit?.slice(0, 7) ?? 'none'} -> ${remoteHash.slice(0, 7)}` + ); + return { category, key, ext, hash: remoteHash }; + } + + if (!quickMode && (remoteHash !== syncedHash || ext.commit !== syncedHash)) { + console.log( + `[${ext.name}] Update: ${syncedHash?.slice(0, 7) ?? 'none'} -> ${remoteHash.slice(0, 7)}` + ); + return { category, key, ext, hash: remoteHash }; + } + } catch { + console.error(`Check failed: ${ext.name}`); + } + return null; + }) + ) + ) +).filter((u): u is NonNullable => u !== null); + +if (updates.length === 0) { + console.log('No updates found'); + await setOutput('updated', 'false'); + process.exit(0); +} + +if (quickMode) { + console.log(`Found ${updates.length} updates. Updating extensions.json...`); + updates.forEach(({ category, key, hash }) => (extensionsData[category][key].commit = hash)); + await Bun.write('extensions.json', JSON.stringify(extensionsData, null, 4)); + await setOutput('updated', 'true'); + process.exit(0); +} + +const { CI, GITHUB_EVENT_NAME } = process.env; +if ( + CI === 'true' && + GITHUB_EVENT_NAME && + !['schedule', 'workflow_dispatch'].includes(GITHUB_EVENT_NAME) +) { + console.log('Skipping updates (CI)'); + await setOutput('updated', 'false'); + process.exit(0); +} + +console.log(`Updating ${updates.length} extensions...`); +await $`rm -rf ${TEMP_DIR}`; + +let changed = false; +for (const { key, ext, hash, category } of updates) { + console.log(`Processing ${ext.name}...`); + const temp = join(TEMP_DIR, key); + const dest = join(EXT_DIR, key); + + try { + await $`git clone --depth 1 ${ext.source} ${temp}`.quiet(); + await $`rm -rf ${dest} && mkdir -p ${dest}`; + + for (const file of config.filesToCopy) { + const srcPath = join(temp, file); + if (existsSync(srcPath)) await cp(srcPath, join(dest, file), { recursive: true }); + } + + extensionsData[category][key].commit = hash; + changed = true; + console.log(` Updated ${ext.name}`); + } catch (e) { + console.error(` Update failed: ${ext.name}`, e); + } +} + +await $`rm -rf ${TEMP_DIR}`; +if (changed) { + await Bun.write('extensions.json', JSON.stringify(extensionsData, null, 4)); + console.log('Updated extensions.json'); + await generateData(); + await updateMeilisearch(); + if (useCache) await saveCache(CACHE_PATHS, await generateCacheKey()); +} + +await setOutput('updated', String(changed)); diff --git a/scripts/worker.ts b/scripts/worker.ts new file mode 100644 index 0000000..ae9db49 --- /dev/null +++ b/scripts/worker.ts @@ -0,0 +1,6 @@ +export default { + async fetch(request: Request, env: any) { + // Serve static assets + return env.ASSETS.fetch(request); + } +}; -- cgit