From e284496d66d7a1dec5634585caa2a3d61dcf9e2b Mon Sep 17 00:00:00 2001 From: Oskar Risberg Date: Tue, 2 Jun 2026 22:07:58 +0200 Subject: [PATCH] Fetch parent Reddit threads via OAuth API Reddit now blocks unauthenticated .json endpoints with a 403. The parent-post lookup for reply (t1) posts now hits oauth.reddit.com/comments/{id} with a bearer token from a new modules/reddit-auth.js, refreshing once on a 401. --- indexers/reddit.js | 40 +++++++++--- modules/reddit-auth.js | 135 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 7 deletions(-) create mode 100644 modules/reddit-auth.js diff --git a/indexers/reddit.js b/indexers/reddit.js index a89fa20..e840239 100644 --- a/indexers/reddit.js +++ b/indexers/reddit.js @@ -5,11 +5,12 @@ const { } = require( 'html-entities' ); const Post = require( '../modules/Post.js' ); +const redditAuth = require( '../modules/reddit-auth.js' ); const xmlEntities = new XmlEntities(); const htmlEntities = new AllHtmlEntities(); -const USER_AGENT = 'Peon 1.0.0 by /u/Kokarn'; +const UNAUTHORIZED_STATUS_CODE = 401; const IMAGE_DOMAINS = [ 'i.redd.it', @@ -20,8 +21,10 @@ const IMAGE_DOMAINS = [ class Reddit { constructor () { - this.apiBase = 'https://www.reddit.com'; - this.singleCommentUrl = '/comments/{topicID}.json?limit=1000'; + // The unauthenticated .json endpoints are blocked, so we talk to the + // OAuth API host with a bearer token (see modules/reddit-auth). + this.apiBase = 'https://oauth.reddit.com'; + this.singleCommentUrl = '/comments/{topicID}?limit=1000'; this.requestCount = 0; } @@ -38,17 +41,40 @@ class Reddit { return this.apiBase + this.singleCommentUrl.replace( '{topicID}', this.parseId( topicID ) ); } - async getTopic ( topicID ) { - this.requestCount = this.requestCount + 1; + async authedGet ( url ) { + const token = await redditAuth.getToken(); - return await got( this.getTopicLink( topicID ), { + return got( url, { headers: { - 'user-agent': USER_AGENT, + authorization: `bearer ${ token }`, + 'user-agent': redditAuth.userAgent(), }, json: true, } ); } + async getTopic ( topicID ) { + this.requestCount = this.requestCount + 1; + + const url = this.getTopicLink( topicID ); + + try { + return await this.authedGet( url ); + } catch ( requestError ) { + const statusCode = requestError.statusCode || ( requestError.response && requestError.response.statusCode ); + + // A 401 means the token was rejected (expired or revoked early). + // Drop it and retry once with a fresh one before giving up. + if ( statusCode === UNAUTHORIZED_STATUS_CODE ) { + redditAuth.invalidateToken(); + + return await this.authedGet( url ); + } + + throw requestError; + } + } + findComment ( listing, commentID ) { if ( !listing ) { console.log( 'Got invalid listing data' ); diff --git a/modules/reddit-auth.js b/modules/reddit-auth.js new file mode 100644 index 0000000..49f64e3 --- /dev/null +++ b/modules/reddit-auth.js @@ -0,0 +1,135 @@ +const https = require( 'https' ); +const querystring = require( 'querystring' ); + +const TOKEN_HOSTNAME = 'www.reddit.com'; +const TOKEN_PATH = '/api/v1/access_token'; +const OK_STATUS_CODE = 200; + +// Refresh a little before the token actually expires so an in-flight request +// never races the expiry. +const EXPIRY_SKEW_MS = 60000; +const SECONDS_TO_MS = 1000; + +let cachedToken = null; +let tokenExpiresAt = 0; + +// Reddit asks for a unique, descriptive user-agent. Honour an explicit override, +// otherwise build one that names the bot and (when known) the owning account. +const userAgent = function userAgent () { + if ( process.env.REDDIT_USER_AGENT ) { + return process.env.REDDIT_USER_AGENT; + } + + const account = process.env.REDDIT_USERNAME ? ` (by /u/${ process.env.REDDIT_USERNAME })` : ''; + + return `post-tracker peon${ account }`; +}; + +const basicAuthHeader = function basicAuthHeader () { + const clientId = process.env.REDDIT_CLIENT_ID; + const clientSecret = process.env.REDDIT_CLIENT_SECRET; + + if ( !clientId || !clientSecret ) { + throw new Error( 'Reddit OAuth requires REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET' ); + } + + const encoded = Buffer.from( `${ clientId }:${ clientSecret }` ).toString( 'base64' ); + + return `Basic ${ encoded }`; +}; + +// Script apps authenticate as a user (password grant); web/confidential apps use +// application-only auth (client_credentials). Pick based on which env vars are set. +const grantBody = function grantBody () { + if ( process.env.REDDIT_USERNAME && process.env.REDDIT_PASSWORD ) { + return querystring.stringify( { + grant_type: 'password', + password: process.env.REDDIT_PASSWORD, + username: process.env.REDDIT_USERNAME, + } ); + } + + return querystring.stringify( { + grant_type: 'client_credentials', + } ); +}; + +const fetchToken = function fetchToken () { + return new Promise( ( resolve, reject ) => { + const payload = grantBody(); + const options = { + headers: { + authorization: basicAuthHeader(), + 'content-length': Buffer.byteLength( payload ), + 'content-type': 'application/x-www-form-urlencoded', + 'user-agent': userAgent(), + }, + hostname: TOKEN_HOSTNAME, + method: 'POST', + path: TOKEN_PATH, + }; + + const request = https.request( options, ( response ) => { + let body = ''; + + response.setEncoding( 'utf8' ); + response.on( 'data', ( chunk ) => { + body = `${ body }${ chunk }`; + } ); + + response.on( 'end', () => { + if ( response.statusCode !== OK_STATUS_CODE ) { + reject( new Error( `Reddit token request failed with status ${ response.statusCode }` ) ); + + return; + } + + let parsed; + + try { + parsed = JSON.parse( body ); + } catch ( parseError ) { + reject( parseError ); + + return; + } + + const lifetimeMs = parsed.expires_in * SECONDS_TO_MS; + + cachedToken = parsed.access_token; + tokenExpiresAt = Date.now() + lifetimeMs; + + resolve( cachedToken ); + } ); + } ); + + request.on( 'error', ( error ) => { + reject( error ); + } ); + + request.write( payload ); + request.end(); + } ); +}; + +// Returns a valid bearer token, fetching a fresh one when missing or near expiry. +const getToken = function getToken () { + if ( cachedToken && Date.now() < tokenExpiresAt - EXPIRY_SKEW_MS ) { + return Promise.resolve( cachedToken ); + } + + return fetchToken(); +}; + +// Drops the cached token so the next getToken() call re-authenticates. Used to +// recover from a 401 (e.g. a token revoked before its stated expiry). +const invalidateToken = function invalidateToken () { + cachedToken = null; + tokenExpiresAt = 0; +}; + +module.exports = { + getToken, + invalidateToken, + userAgent, +};