Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 33 additions & 7 deletions indexers/reddit.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ const {
} = require( 'html-entities' );

const Post = require( '../modules/Post.js' );
const redditAuth = require( '../modules/reddit-auth.js' );

const xmlEntities = new XmlEntities();
const htmlEntities = new AllHtmlEntities();

const USER_AGENT = 'Peon 1.0.0 by /u/Kokarn';
const UNAUTHORIZED_STATUS_CODE = 401;

const IMAGE_DOMAINS = [
'i.redd.it',
Expand All @@ -20,8 +21,10 @@ const IMAGE_DOMAINS = [

class Reddit {
constructor () {
this.apiBase = 'https://www.reddit.com';
this.singleCommentUrl = '/comments/{topicID}.json?limit=1000';
// The unauthenticated .json endpoints are blocked, so we talk to the
// OAuth API host with a bearer token (see modules/reddit-auth).
this.apiBase = 'https://oauth.reddit.com';
this.singleCommentUrl = '/comments/{topicID}?limit=1000';

this.requestCount = 0;
}
Expand All @@ -38,17 +41,40 @@ class Reddit {
return this.apiBase + this.singleCommentUrl.replace( '{topicID}', this.parseId( topicID ) );
}

async getTopic ( topicID ) {
this.requestCount = this.requestCount + 1;
async authedGet ( url ) {
const token = await redditAuth.getToken();

return await got( this.getTopicLink( topicID ), {
return got( url, {
headers: {
'user-agent': USER_AGENT,
authorization: `bearer ${ token }`,
'user-agent': redditAuth.userAgent(),
},
json: true,
} );
}

async getTopic ( topicID ) {
this.requestCount = this.requestCount + 1;

const url = this.getTopicLink( topicID );

try {
return await this.authedGet( url );
} catch ( requestError ) {
const statusCode = requestError.statusCode || ( requestError.response && requestError.response.statusCode );

// A 401 means the token was rejected (expired or revoked early).
// Drop it and retry once with a fresh one before giving up.
if ( statusCode === UNAUTHORIZED_STATUS_CODE ) {
redditAuth.invalidateToken();

return await this.authedGet( url );
}

throw requestError;
}
}

findComment ( listing, commentID ) {
if ( !listing ) {
console.log( 'Got invalid listing data' );
Expand Down
135 changes: 135 additions & 0 deletions modules/reddit-auth.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
const https = require( 'https' );
const querystring = require( 'querystring' );

const TOKEN_HOSTNAME = 'www.reddit.com';
const TOKEN_PATH = '/api/v1/access_token';
const OK_STATUS_CODE = 200;

// Refresh a little before the token actually expires so an in-flight request
// never races the expiry.
const EXPIRY_SKEW_MS = 60000;
const SECONDS_TO_MS = 1000;

let cachedToken = null;
let tokenExpiresAt = 0;

// Reddit asks for a unique, descriptive user-agent. Honour an explicit override,
// otherwise build one that names the bot and (when known) the owning account.
const userAgent = function userAgent () {
if ( process.env.REDDIT_USER_AGENT ) {
return process.env.REDDIT_USER_AGENT;
}

const account = process.env.REDDIT_USERNAME ? ` (by /u/${ process.env.REDDIT_USERNAME })` : '';

return `post-tracker peon${ account }`;
};

const basicAuthHeader = function basicAuthHeader () {
const clientId = process.env.REDDIT_CLIENT_ID;
const clientSecret = process.env.REDDIT_CLIENT_SECRET;

if ( !clientId || !clientSecret ) {
throw new Error( 'Reddit OAuth requires REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET' );
}

const encoded = Buffer.from( `${ clientId }:${ clientSecret }` ).toString( 'base64' );

return `Basic ${ encoded }`;
};

// Script apps authenticate as a user (password grant); web/confidential apps use
// application-only auth (client_credentials). Pick based on which env vars are set.
const grantBody = function grantBody () {
if ( process.env.REDDIT_USERNAME && process.env.REDDIT_PASSWORD ) {
return querystring.stringify( {
grant_type: 'password',
password: process.env.REDDIT_PASSWORD,
username: process.env.REDDIT_USERNAME,
} );
}

return querystring.stringify( {
grant_type: 'client_credentials',
} );
};

const fetchToken = function fetchToken () {
return new Promise( ( resolve, reject ) => {
const payload = grantBody();
const options = {
headers: {
authorization: basicAuthHeader(),
'content-length': Buffer.byteLength( payload ),
'content-type': 'application/x-www-form-urlencoded',
'user-agent': userAgent(),
},
hostname: TOKEN_HOSTNAME,
method: 'POST',
path: TOKEN_PATH,
};

const request = https.request( options, ( response ) => {
let body = '';

response.setEncoding( 'utf8' );
response.on( 'data', ( chunk ) => {
body = `${ body }${ chunk }`;
} );

response.on( 'end', () => {
if ( response.statusCode !== OK_STATUS_CODE ) {
reject( new Error( `Reddit token request failed with status ${ response.statusCode }` ) );

return;
}

let parsed;

try {
parsed = JSON.parse( body );
} catch ( parseError ) {
reject( parseError );

return;
}

const lifetimeMs = parsed.expires_in * SECONDS_TO_MS;

cachedToken = parsed.access_token;
tokenExpiresAt = Date.now() + lifetimeMs;

resolve( cachedToken );
} );
} );

request.on( 'error', ( error ) => {
reject( error );
} );

request.write( payload );
request.end();
} );
};

// Returns a valid bearer token, fetching a fresh one when missing or near expiry.
const getToken = function getToken () {
if ( cachedToken && Date.now() < tokenExpiresAt - EXPIRY_SKEW_MS ) {
return Promise.resolve( cachedToken );
}

return fetchToken();
};

// Drops the cached token so the next getToken() call re-authenticates. Used to
// recover from a 401 (e.g. a token revoked before its stated expiry).
const invalidateToken = function invalidateToken () {
cachedToken = null;
tokenExpiresAt = 0;
};

module.exports = {
getToken,
invalidateToken,
userAgent,
};
Loading