awesome-docker/buildMetadata.js

168 lines
4.7 KiB
Markdown
Raw Normal View History

2018-07-19 05:59:07 -04:00
const fs = require('fs-extra');
2018-06-07 03:06:04 -04:00
const fetch = require('node-fetch');
const dayjs = require('dayjs');
require('draftlog').into(console);
2018-07-20 03:43:55 -04:00
const LOG = {
error: (...args) => console.error('❌ ERROR', { ...args }),
debug: (...args) => {
if (process.env.DEBUG) console.log('💡 DEBUG: ', { ...args });
},
};
2018-07-21 09:12:59 -04:00
const handleFailure = err => {
LOG.error(err);
process.exit(1);
};
2018-07-20 03:43:55 -04:00
2018-07-21 09:12:59 -04:00
process.on('unhandledRejection', handleFailure);
2018-06-07 03:06:04 -04:00
2019-01-25 16:26:52 -05:00
if (!process.env.GITHUB_TOKEN) {
2018-07-20 03:43:55 -04:00
LOG.error('no credentials found.');
2018-07-19 05:59:07 -04:00
process.exit(1);
2018-06-07 03:06:04 -04:00
}
2019-01-25 16:26:52 -05:00
const TOKEN = process.env.GITHUB_TOKEN
2018-06-07 03:06:04 -04:00
// --- ENV VAR ---
const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 10;
const DELAY = parseInt(process.env.DELAY, 10) || 3000;
const INTERVAL = parseInt(process.env.INTERVAL, 10) || 1;
const INTERVAL_UNIT = process.env.INTERVAL_UNIT || 'days';
2018-07-20 03:43:55 -04:00
2018-07-21 09:12:59 -04:00
// --- FILES ---
2018-07-19 05:59:07 -04:00
const DATA_FOLDER = 'data';
2018-07-21 09:12:59 -04:00
const README = 'README.md';
const DATE = dayjs().format('YYYY-MM-DDTHH.mm.ss');
const GITHUB_METADATA_FILE = `${DATA_FOLDER}/${DATE}-fetched_repo_data.json`;
2018-07-19 05:59:07 -04:00
const LATEST_FILENAME = `${DATA_FOLDER}/latest`;
2018-08-08 06:22:43 -04:00
const GITHUB_REPOS = `${DATA_FOLDER}/repository.json`;
2019-01-25 16:26:52 -05:00
const Authorization = `token ${TOKEN}`
2018-07-20 03:43:55 -04:00
2018-06-07 03:06:04 -04:00
// --- HTTP ---
const API = 'https://api.github.com/';
const options = {
method: 'GET',
headers: {
'User-Agent': 'awesome-docker script listing',
'Content-Type': 'application/json',
2019-01-25 16:26:52 -05:00
Authorization,
2018-06-07 03:06:04 -04:00
},
};
2018-07-21 09:12:59 -04:00
// ----------------------------------------------------------------------------
2018-06-07 03:06:04 -04:00
const removeHost = x => x.slice('https://github.com/'.length, x.length);
2018-07-20 03:43:55 -04:00
2018-06-07 03:06:04 -04:00
const delay = ms =>
new Promise(resolve => {
setTimeout(() => resolve(), ms);
});
2018-07-19 05:59:07 -04:00
const get = (pathURL, opt) => {
2018-07-21 09:12:59 -04:00
LOG.debug(`Fetching ${pathURL}`);
2018-07-19 05:59:07 -04:00
return fetch(`${API}repos/${pathURL}`, {
2018-06-07 03:06:04 -04:00
...options,
...opt,
})
2018-07-19 05:59:07 -04:00
.catch(handleFailure)
2018-06-07 03:06:04 -04:00
.then(response => {
if (response.ok) return response.json();
throw new Error('Network response was not ok.');
})
2018-07-19 05:59:07 -04:00
.catch(handleFailure);
};
2018-06-07 03:06:04 -04:00
2018-07-19 05:59:07 -04:00
const fetchAll = batch => Promise.all(batch.map(async pathURL => get(pathURL)));
2018-06-07 03:06:04 -04:00
const extractAllRepos = markdown => {
const re = /https:\/\/github\.com\/([a-zA-Z0-9-._]+)\/([a-zA-Z0-9-._]+)/g;
const md = markdown.match(re);
return [...new Set(md)];
};
const ProgressBar = (i, batchSize, total) => {
const progress = Math.round((i / total) * 100);
const units = Math.round(progress / 2);
2018-07-21 09:12:59 -04:00
const barLine = console.draft('Starting batch...');
2018-06-07 03:06:04 -04:00
return barLine(
`[${'='.repeat(units)}${' '.repeat(50 - units)}] ${progress}% - # ${i}`,
);
};
2018-07-21 09:12:59 -04:00
// ----------------------------------------------------------------------------
2018-06-07 03:06:04 -04:00
async function batchFetchRepoMetadata(githubRepos) {
const repos = githubRepos.map(removeHost);
const metadata = [];
/* eslint-disable no-await-in-loop */
for (let i = 0; i < repos.length; i += BATCH_SIZE) {
const batch = repos.slice(i, i + BATCH_SIZE);
2018-07-20 03:43:55 -04:00
LOG.debug({ batch });
2018-06-07 03:06:04 -04:00
const res = await fetchAll(batch);
2018-07-20 03:43:55 -04:00
LOG.debug('batch fetched...');
2018-06-07 03:06:04 -04:00
metadata.push(...res);
ProgressBar(i, BATCH_SIZE, repos.length);
2018-06-09 10:08:57 -04:00
// poor man's rate limiting so github don't ban us
2018-06-07 03:06:04 -04:00
await delay(DELAY);
}
ProgressBar(repos.length, BATCH_SIZE, repos.length);
return metadata;
}
function shouldUpdate(lastUpdateTime) {
LOG.debug({ lastUpdateTime });
if (!lastUpdateTime) return true;
2018-07-19 05:59:07 -04:00
const hours = lastUpdateTime.slice(
2018-07-19 05:59:07 -04:00
'data/YYYY-MM-DDT'.length,
'data/YYYY-MM-DDTHH'.length,
);
const latestUpdate = dayjs(
lastUpdateTime.slice('data/'.length, 'data/YYYY-MM-DD'.length),
2018-07-19 05:59:07 -04:00
).add(hours, 'hour');
2018-07-20 03:43:55 -04:00
LOG.debug({ latestUpdate: latestUpdate.format() });
return dayjs().diff(latestUpdate, INTERVAL_UNIT) >= INTERVAL;
2018-07-19 05:59:07 -04:00
}
2018-06-07 03:06:04 -04:00
async function main() {
try {
const lastUpdateTime = await fs.readFile(LATEST_FILENAME, 'utf8');
2018-07-20 03:43:55 -04:00
LOG.debug('Checking if updating is needed');
if (!shouldUpdate(lastUpdateTime)) {
2018-07-20 03:43:55 -04:00
LOG.debug('Last update was less than a day ago 😅. Exiting...');
process.exit();
2018-07-19 05:59:07 -04:00
}
2018-07-20 03:43:55 -04:00
2018-07-19 05:59:07 -04:00
const markdown = await fs.readFile(README, 'utf8');
2018-06-07 03:06:04 -04:00
const githubRepos = extractAllRepos(markdown);
2018-07-20 03:43:55 -04:00
LOG.debug('writing repo list to disk...');
2018-07-19 05:59:07 -04:00
await fs.outputJSON(GITHUB_REPOS, githubRepos, { spaces: 2 });
2018-06-07 03:06:04 -04:00
2018-07-20 03:43:55 -04:00
LOG.debug('fetching data...');
2018-06-25 07:50:07 -04:00
const metadata = await batchFetchRepoMetadata(githubRepos);
2018-06-07 03:06:04 -04:00
2018-07-20 03:43:55 -04:00
LOG.debug('writing metadata to disk...');
2018-07-19 05:59:07 -04:00
await fs.outputJSON(GITHUB_METADATA_FILE, metadata, { spaces: 2 });
2018-07-20 03:43:55 -04:00
LOG.debug('✅ metadata saved');
2018-07-19 05:59:07 -04:00
2018-07-20 03:43:55 -04:00
LOG.debug('removing latest...');
2018-07-19 05:59:07 -04:00
await fs.remove(LATEST_FILENAME);
2018-07-20 03:43:55 -04:00
LOG.debug('writing latest...');
2018-07-19 05:59:07 -04:00
await fs.outputFile(LATEST_FILENAME, GITHUB_METADATA_FILE);
2018-07-20 03:43:55 -04:00
LOG.debug('✅ late update time saved', {
LATEST_FILENAME,
GITHUB_METADATA_FILE,
});
2018-07-19 05:59:07 -04:00
2018-07-20 03:43:55 -04:00
LOG.debug('gracefully shutting down.');
2018-07-19 05:59:07 -04:00
process.exit();
2018-06-07 03:06:04 -04:00
} catch (err) {
2018-07-19 05:59:07 -04:00
handleFailure(err);
2018-06-07 03:06:04 -04:00
}
}
main();