Skip to content

Commit

Permalink
Incremental indexing support for api
Browse files Browse the repository at this point in the history
  • Loading branch information
sivakumar-kailasam committed Oct 19, 2018
1 parent bf1d041 commit 72f69bc
Show file tree
Hide file tree
Showing 11 changed files with 416 additions and 108 deletions.
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ API_DOCS_PATH=../ember-jsonapi-docs/tmp
GUIDES_DOCS_PATH=../guides-app/dist/content
DEBUG=false
DRIVER=algolia
AWS_ACCESS_KEY=""
AWS_SECRET_KEY=""
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ node_modules
build
drivers-output
dist
yarn-error.log
tmp
.vscode
3 changes: 2 additions & 1 deletion .prettierrc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"singleQuote": true,
"semi": false
"semi": false,
"trailingComma": "es5"
}
6 changes: 5 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ program
.version(version, '-v, --version')
.description(description)
.option('-p, --project <project>', 'Project name. Accepts "api" or "guides"')
.option(
'-c, --clear-index',
'Whether indexes of the project should be cleared while processing'
)

program.on('--help', function() {
console.log(`
Expand All @@ -25,7 +29,7 @@ switch (program.project) {
runGuides()
break
case 'api':
runApi()
runApi(program.clearIndex)
break
default:
throw new Error('Invalid --project property')
Expand Down
58 changes: 58 additions & 0 deletions lib/api-docs-sync.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { Promise, all as waitForAllPromises } from 'bluebird'
import S3 from 's3'
import ora from 'ora'
import humanSize from 'human-size'
import http from 'http'
import https from 'https'

// To increase s3's download & upload dir perf
http.globalAgent.maxSockets = https.globalAgent.maxSockets = 30

const { AWS_ACCESS_KEY, AWS_SECRET_KEY } = process.env

const client = S3.createClient({
s3Options: { accessKeyId: AWS_ACCESS_KEY, secretAccessKey: AWS_SECRET_KEY }
})

const jsonDocsDirDownloadOptions = {
localDir: 'tmp/json-docs',
s3Params: { Bucket: 'api-docs.emberjs.com', Prefix: 'json-docs' }
}

let revDocsDirDownloadOptions = {
localDir: 'tmp/rev-index',
s3Params: { Bucket: 'api-docs.emberjs.com', Prefix: 'rev-index' }
}

const syncDir = options => {
return new Promise((resolve, reject) => {
let sync = client.downloadDir(options)
let progressIndicator = ora(
`downloading ${options.s3Params.Prefix} docs`
).start()

sync.on('progress', () => {
const { progressAmount, progressTotal } = sync
progressIndicator.text = `Downloading json docs (${humanSize(
progressAmount
)} of ${humanSize(progressTotal)})`
})

sync.on('end', () => {
progressIndicator.succeed(`downloaded ${options.s3Params.Prefix} docs`)
resolve()
})

sync.on('error', err => {
progressIndicator.fail()
reject(err)
})
})
}

export default function downloadExistingDocsToLocal() {
return waitForAllPromises([
syncDir(jsonDocsDirDownloadOptions),
syncDir(revDocsDirDownloadOptions)
])
}
145 changes: 80 additions & 65 deletions lib/api.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
require('dotenv').config()

import Bluebird from 'bluebird'
import { all, resolve } from 'bluebird'
import { compare as compareSemVers } from 'semver'
import { difference } from 'lodash'

import logger from './utils/logger'
import drivers from './drivers'
import { readTmpFileFactory, readTmpFileAsyncFactory } from './utils/fs'
import schemas from './schemas'
import downloadApiDocs from './api-docs-sync'

// Get 'readTmpFile' and 'readTmpFileAsync' bound by 'api'
const PROJECT_TYPE = 'api'
Expand All @@ -15,46 +19,68 @@ const { DRIVER } = process.env

const SelectedDriver = drivers[DRIVER]

export function run() {
// Initialise drivers
SelectedDriver.init('modules')
SelectedDriver.init('classes')
SelectedDriver.init('methods')

// Load ember.json which includes all available ember versions.
return (
readTmpFileAsync('rev-index/ember.json')
// Extract available versions
.then(emberJson => emberJson.meta.availableVersions)
// Clear the driver contents
.tap(clearDriver)
// Grab the json file of each ember version
.map(readEmberIndexFileForVersion)
const apiIndexes = ['modules', 'classes', 'methods', 'versions']

export async function run(clearIndex = false) {
apiIndexes.map(SelectedDriver.init)

if (clearIndex) {
await all(apiIndexes.map(SelectedDriver.clear))
}

await downloadApiDocs()

await all([processDocs('ember'), processDocs('ember-data')])
}

async function processDocs(project) {
let prevIndexedVersions = await SelectedDriver.getPreviouslyIndexedVersions(
project
)

const {
meta: { availableVersions }
} = await readTmpFileAsync(`rev-index/${project}.json`)

let versionsToProcess = difference(availableVersions, prevIndexedVersions)

if (versionsToProcess.length === 0) {
console.log(`No new versions to process for ${project}`)
return
}

try {
console.log(`Processing ${project} for versions: ${versionsToProcess}`)

await versionsToProcess
.map(version => readIndexFileForVersion(version, project))
// Fetch all public modules and public classes
.map(fetchPublicModuleClassesForEmberVersion)
.map(versionIndexObject =>
fetchPublicModuleClassesForVersion(versionIndexObject, project)
)
// Run the schema against all data stored
.map(mapDataForVersion)
// Write out to selected driver.
.map(writeToDriver)
// Load ember-data.json which includes all available ember-data versions
.then(() => readTmpFileAsync('rev-index/ember-data.json'))
.then(emberJson => emberJson.meta.availableVersions)
.map(readEmberDataIndexFileForVersion)
.map(fetchPublicModuleClassesForEmberDataVersion)
.map(mapDataForVersion)
.map(writeToDriver)
// Handle script error
.catch(errorHandler)
)
}

function readEmberIndexFileForVersion(version) {
return readIndexFileForVersion(version, 'ember')
await SelectedDriver.write(
'versions',
[
{
id: project,
name: project,
versions: [...prevIndexedVersions, ...versionsToProcess].sort(
compareSemVers
)
}
],
project
)
} catch (err) {
console.log('Error:: ', err)
}
}

function readEmberDataIndexFileForVersion(version) {
return readIndexFileForVersion(version, 'ember-data')
}
/**
* Read index file for version
*
Expand All @@ -68,14 +94,6 @@ function readIndexFileForVersion(version, libName) {
return readTmpFile(emberVersionJSONPath)
}

function fetchPublicModuleClassesForEmberVersion(versionIndexObject) {
return fetchPublicModuleClassesForVersion(versionIndexObject, 'ember')
}

function fetchPublicModuleClassesForEmberDataVersion(versionIndexObject) {
return fetchPublicModuleClassesForVersion(versionIndexObject, 'ember-data')
}

/**
* Fetch public modules and classes for version
*
Expand Down Expand Up @@ -143,40 +161,37 @@ function mapDataForVersion(versionObject) {
* @param versionObject - Object version to write out
*/
function writeToDriver(versionObject) {
const { id } = versionObject.version.data

let tokens = id.split('-')
let version = tokens.pop()
let projectName = tokens.join('-')

logger.logGreen(
`version: ${versionObject.version.data.id}, public classes: ${
`version: ${id}, public classes: ${
versionObject.publicClasses.length
}, public modules: ${versionObject.publicModules.length}, methods: ${
versionObject.methods.length
}`
)

// Wait for all promises to complete before continuing
return Bluebird.all([
SelectedDriver.write('modules', versionObject.publicModules),
SelectedDriver.write('classes', versionObject.publicClasses),
SelectedDriver.write('methods', versionObject.methods)
return all([
SelectedDriver.write(
'modules',
versionObject.publicModules,
projectName,
version
),
SelectedDriver.write(
'classes',
versionObject.publicClasses,
projectName,
version
),
SelectedDriver.write('methods', versionObject.methods, projectName, version)
])
}

/**
* Clears the driver indices
*
* @returns {Promise} - Promise with all drivers cleared.
*/
function clearDriver() {
return Bluebird.all([
SelectedDriver.clear('modules'),
SelectedDriver.clear('classes'),
SelectedDriver.clear('methods')
])
}

// Handle errors
function errorHandler(err) {
console.log('Error:: ', err)
}

/**
* Takes an array of classes, extracts the methods from each one,
* and runs the method schema to transform the payload
Expand Down
19 changes: 12 additions & 7 deletions lib/drivers/algolia.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ let client = null

const indices = {}

export function init(indexName) {
function init(indexName) {
if (!client) client = AlgoliaSearch(ALGOLIA_APP_ID, ALGOLIA_ADMIN_KEY)
// Create an algolia index
const index = client.initIndex(indexName)
Expand All @@ -19,7 +19,7 @@ export function init(indexName) {
return index
}

export function write(indexName, records) {
function write(indexName, records) {
const index = indices[indexName]

return Bluebird.resolve(index.addObjects(records))
Expand All @@ -36,13 +36,18 @@ export function write(indexName, records) {
})
}

export function clear(indexName) {
function clear(indexName) {
const index = indices[indexName]
return Bluebird.resolve(index.clearIndex())
}

export default {
init,
write,
clear
async function getPreviouslyIndexedVersions(projectName) {
let { hits } = await indices['versions'].search(projectName)
if (!hits) {
return []
}
let { versions } = hits.find(hit => hit.name === projectName)
return versions
}

export default { init, write, clear, getPreviouslyIndexedVersions }
46 changes: 34 additions & 12 deletions lib/drivers/json.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,40 @@
import Bluebird from 'bluebird'
import fs from 'file-system'
import { resolve } from 'bluebird'
import {
emptyDirSync,
existsSync,
outputJsonSync,
readJsonSync,
rmdirSync,
} from 'fs-extra'

export function init(fileName) {}
const outputFolder = './drivers-output'

export function write(fileName, content) {
return fs.writeFileSync(
`./drivers-output/json/${fileName}.json`,
JSON.stringify(content),
'utf8'
)
function init() {}

function write(indexName, content, projectName, version) {
let fileName = `${outputFolder}/json/${projectName}/`
fileName += version ? `${version}/${indexName}.json` : `${indexName}.json`

return outputJsonSync(fileName, content, { spaces: 2 })
}

function clear() {
if (!existsSync(outputFolder)) {
return resolve()
}
emptyDirSync(outputFolder)
return resolve(rmdirSync(outputFolder))
}

export function clear() {
return Bluebird.resolve()
function getPreviouslyIndexedVersions(projectName) {
let fileName = `${outputFolder}/json/${projectName}/versions.json`

if (!existsSync(fileName)) {
return []
}

let [{ versions }] = readJsonSync(fileName)
return versions
}

export default { init, write, clear }
export default { init, write, clear, getPreviouslyIndexedVersions }
Loading

0 comments on commit 72f69bc

Please sign in to comment.