Merge pull request #911 from veggiemonk/remove-check-ghrepo

This commit is contained in:
Julien Bisconti 2020-11-11 08:48:02 +01:00 committed by GitHub
commit ace3509726
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 296 additions and 351 deletions

View File

@ -2,6 +2,17 @@
<!-- **Remember that entries are ordered alphabetically** -->
<!--
TLDR:
* all entries sorted alphabetically,
* If paying service add :heavy_dollar_sign:
* If WIP add :construction:
* clear and short description of the project
* project MUST have: How to setup/install
* project MUST have: How to use (examples)
* we can help you get there :)
-->
# Quality Standards
Note that we can help you achieve those standards, just try your best and be brave.

View File

@ -25,6 +25,6 @@ jobs:
- name: Install Dependencies
# if: steps.cache.outputs.cache-hit != 'true'
run: npm ci --ignore-scripts --no-audit --no-progress --prefer-offline
- run: npm run test
- run: npm run test-pr
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

1
.gitignore vendored
View File

@ -8,4 +8,5 @@ dist
website/index.html
website/table.html
.idea
**/.DS_Store

1
.npmrc
View File

@ -1 +0,0 @@
save-exact=true

View File

@ -1,4 +1,6 @@
{
"bracketSpacing": true,
"tabWidth": 4,
"semi": true,
"trailingComma": "all",
"singleQuote": true

View File

@ -1,132 +0,0 @@
const fs = require('fs-extra');
const fetch = require('node-fetch');
require('draftlog').into(console);
const LOG = {
error: (...args) => console.error(' ERROR', { ...args }),
debug: (...args) => {
if (process.env.DEBUG) console.log('💡 DEBUG: ', { ...args });
},
};
const handleFailure = (err) => {
LOG.error(err);
process.exit(1);
};
process.on('unhandledRejection', handleFailure);
if (!process.env.GITHUB_TOKEN) {
LOG.error('no credentials found.');
process.exit(1);
}
const TOKEN = process.env.GITHUB_TOKEN;
// --- ENV VAR ---
const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 10;
const DELAY = parseInt(process.env.DELAY, 10) || 3000;
const INTERVAL = parseInt(process.env.INTERVAL, 10) || 1;
const INTERVAL_UNIT = process.env.INTERVAL_UNIT || 'days';
// --- FILES ---
const DATA_FOLDER = 'data';
const README = 'README.md';
const LATEST_FILENAME = `${DATA_FOLDER}/latest`;
const GITHUB_REPOS = `${DATA_FOLDER}/repository.json`;
const Authorization = `token ${TOKEN}`;
// --- HTTP ---
const API = 'https://api.github.com/';
const options = {
method: 'GET',
headers: {
'User-Agent': 'awesome-docker script listing',
'Content-Type': 'application/json',
Authorization,
},
};
// ----------------------------------------------------------------------------
const removeHost = (x) => x.slice('https://github.com/'.length, x.length);
const delay = (ms) =>
new Promise((resolve) => {
setTimeout(() => resolve(), ms);
});
const get = (pathURL, opt) => {
LOG.debug(`Fetching ${pathURL}`);
return fetch(`${API}repos/${pathURL}`, {
...options,
...opt,
})
.catch(handleFailure)
.then((response) => {
if (response.ok) return response.json();
throw new Error('Network response was not ok.');
})
.catch(handleFailure);
};
const fetchAll = (batch) =>
Promise.all(batch.map(async (pathURL) => get(pathURL)));
const extractAllLinks = (markdown) => {
const re = /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/g;
return markdown.match(re);
};
const extractAllRepos = (markdown) => {
const re = /https:\/\/github\.com\/([a-zA-Z0-9-._]+)\/([a-zA-Z0-9-._]+)/g;
const md = markdown.match(re);
return [...new Set(md)];
};
const ProgressBar = (i, batchSize, total) => {
const progress = Math.round((i / total) * 100);
const units = Math.round(progress / 2);
const barLine = console.draft('Starting batch...');
return barLine(
`[${'='.repeat(units)}${' '.repeat(50 - units)}] ${progress}% - # ${i}`,
);
};
// ----------------------------------------------------------------------------
async function batchFetchRepoMetadata(githubRepos) {
const repos = githubRepos.map(removeHost);
const metadata = [];
/* eslint-disable no-await-in-loop */
for (let i = 0; i < repos.length; i += BATCH_SIZE) {
const batch = repos.slice(i, i + BATCH_SIZE);
LOG.debug({ batch });
const res = await fetchAll(batch);
LOG.debug('batch fetched...');
metadata.push(...res);
ProgressBar(i, BATCH_SIZE, repos.length);
// poor man's rate limiting so github doesn't ban us
await delay(DELAY);
}
ProgressBar(repos.length, BATCH_SIZE, repos.length);
return metadata;
}
async function main() {
try {
const markdown = await fs.readFile(README, 'utf8');
const links = extractAllLinks(markdown);
const githubRepos = extractAllRepos(markdown);
LOG.debug('writing repo list to disk...');
await fs.outputJSON(GITHUB_REPOS, githubRepos, { spaces: 2 });
LOG.debug('fetching data...');
const metadata = await batchFetchRepoMetadata(githubRepos);
LOG.debug('gracefully shutting down.');
process.exit();
} catch (err) {
handleFailure(err);
}
}
main();

View File

@ -5,7 +5,8 @@
"main": "build.js",
"scripts": {
"build": "rimraf ./dist/ && node build.js",
"test": "node pull_request.js"
"test-pr": "node tests/pull_request.js",
"test": "node tests/test_all.js"
},
"repository": {
"type": "git",

View File

@ -1,216 +0,0 @@
const fs = require('fs-extra');
const fetch = require('node-fetch');
const exclude = require('./exclude_in_test.json');
function envvar_undefined(variable_name) {
throw new Error(`${variable_name} must be defined`);
}
console.log({
DEBUG: process.env.DEBUG || false,
});
const README = 'README.md';
const GITHUB_GQL_API = 'https://api.github.com/graphql';
const TOKEN = process.env.GITHUB_TOKEN || envvar_undefined('GITHUB_TOKEN');
const LINKS_OPTIONS = {
redirect: 'error',
headers: {
'Content-Type': 'application/json',
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
},
};
const Authorization = `token ${TOKEN}`;
const make_GQL_options = (query) => ({
method: 'POST',
headers: {
Authorization,
'Content-Type': 'application/json',
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
},
body: JSON.stringify({ query }),
});
const LOG = {
error: (...args) => console.error('❌ ERROR', args),
error_string: (...args) =>
console.error('❌ ERROR', JSON.stringify({ ...args }, null, ' ')),
debug: (...args) => {
if (process.env.DEBUG) console.log('>>> DEBUG: ', { ...args });
},
debug_string: (...args) => {
if (process.env.DEBUG)
console.log('>>> DEBUG: ', JSON.stringify({ ...args }, null, ' '));
},
};
const handleFailure = (error) => {
console.error(`${error.message}: ${error.stack}`, { error });
process.exit(1);
};
process.on('unhandledRejection', handleFailure);
const extract_all_links = (markdown) => {
// if you have a problem and you try to solve it with a regex,
// now you have two problems
// TODO: replace this mess with a mardown parser ?
const re = /(((https:(?:\/\/)?)(?:[-;:&=+$,\w]+@)?[A-Za-z0-9.-]+|(?:www\.|[-;:&=+$,\w]+@)[A-Za-z0-9.-]+)((?:\/[+~%/.\w\-_]*)?\??(?:[-+=&;%@.\w_]*)#?(?:[.!/@\-\\\w]*))?)/g;
return markdown.match(re);
};
const find_duplicates = (arr) => {
const hm = {};
const dup = [];
arr.forEach((e) => {
if (hm[e]) dup.push(e);
else hm[e] = true;
});
return dup;
};
const partition = (arr, func) => {
const ap = [[], []];
arr.forEach((e) => (func(e) ? ap[0].push(e) : ap[1].push(e)));
return ap;
};
async function fetch_link(url) {
try {
const { ok, statusText, redirected } = await fetch(url, LINKS_OPTIONS);
return [url, { ok, status: statusText, redirected }];
} catch (error) {
return [url, { ok: false, status: error.message }];
}
}
async function batch_fetch({ arr, get, post_filter_func, BATCH_SIZE = 8 }) {
const result = [];
/* eslint-disable no-await-in-loop */
for (let i = 0; i < arr.length; i += BATCH_SIZE) {
const batch = arr.slice(i, i + BATCH_SIZE);
LOG.debug_string({ batch });
let res = await Promise.all(batch.map(get));
console.log(`batch fetched...${i + BATCH_SIZE}`);
res = post_filter_func ? res.filter(post_filter_func) : res;
LOG.debug_string({ res });
result.push(...res);
}
return result;
}
const extract_repos = (arr) =>
arr
.map((e) => e.substr('https://github.com/'.length).split('/'))
.filter((r) => r.length === 2 && r[1] !== '');
const generate_GQL_query = (arr) =>
`query AWESOME_REPOS{ ${arr
.map(
([owner, name]) =>
`repo_${owner.replace(/(-|\.)/g, '_')}_${name.replace(
/(-|\.)/g,
'_',
)}: repository(owner: "${owner}", name:"${name}"){ nameWithOwner } `,
)
.join('')} }`;
// =============================================================
// const batch_github_repos = async (github_links) => {
// const BATCH_SIZE = 50;
// const repos = extract_repos(github_links);
// for (let i = 0; i < repos.length; i += BATCH_SIZE) {
// const batch = repos.slice(i, i + BATCH_SIZE);
// const query = generate_GQL_query(batch);
// LOG.debug({ query });
// const gql_response = await fetch(
// 'https://api.github.com/graphql',
// make_GQL_options(query),
// ).then((r) => r.json());
// LOG.debug({ gql_response });
// }
// };
// =============================================================
const exclude_length = exclude.length;
const exclude_from_list = (link) => {
let is_excluded = false;
for (let i = 0; i < exclude_length; i += 1) {
if (link.startsWith(exclude[i])) {
is_excluded = true;
break;
}
}
return is_excluded;
};
async function main() {
const has_error = {
show: false,
duplicates: '',
other_links_error: '',
github_repos: '',
};
const markdown = await fs.readFile(README, 'utf8');
let links = extract_all_links(markdown);
links = links.filter((l) => !exclude_from_list(l)); // exclude websites
LOG.debug_string({ links });
console.log(`total links to check ${links.length}`);
console.log('checking for duplicates links...');
const duplicates = find_duplicates(links);
if (duplicates.length > 0) {
has_error.show = true;
has_error.duplicates = duplicates;
}
LOG.debug_string({ duplicates });
const [github_links, external_links] = partition(links, (link) =>
link.startsWith('https://github.com'),
);
console.log(`checking ${external_links.length} external links...`);
const external_links_error = await batch_fetch({
arr: external_links,
get: fetch_link,
post_filter_func: (x) => !x[1].ok,
BATCH_SIZE: 8,
});
if (external_links_error.length > 0) {
has_error.show = true;
has_error.other_links_error = external_links_error;
}
console.log(`checking ${github_links.length} GitHub repositories...`);
const repos = extract_repos(github_links);
const query = generate_GQL_query(repos);
const options = make_GQL_options(query);
const gql_response = await fetch(GITHUB_GQL_API, options).then((r) =>
r.json(),
);
if (gql_response.errors) {
has_error.show = true;
has_error.github_repos = gql_response.errors;
}
console.log({
TEST_PASSED: has_error.show,
GITHUB_REPOSITORY: github_links.length,
EXTERNAL_LINKS: external_links.length,
});
if (has_error.show) {
LOG.error_string(has_error);
process.exit(1);
}
}
console.log('starting...');
main();

102
tests/common.js Normal file
View File

@ -0,0 +1,102 @@
const fetch = require('node-fetch');
const exclude = require('./exclude_in_test.json');
const LINKS_OPTIONS = {
redirect: 'error',
headers: {
'Content-Type': 'application/json',
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
},
};
const LOG = {
error: (...args) => console.error('❌ ERROR', args),
error_string: (...args) =>
console.error('❌ ERROR', JSON.stringify({ ...args }, null, ' ')),
debug: (...args) => {
if (process.env.DEBUG) console.log('>>> DEBUG: ', { ...args });
},
debug_string: (...args) => {
if (process.env.DEBUG)
console.log('>>> DEBUG: ', JSON.stringify({ ...args }, null, ' '));
},
};
const handleFailure = (error) => {
console.error(`${error.message}: ${error.stack}`, { error });
process.exit(1);
};
process.on('unhandledRejection', handleFailure);
const extract_all_links = (markdown) => {
// if you have a problem and you try to solve it with a regex,
// now you have two problems
// TODO: replace this mess with a mardown parser ?
const re = /(((https:(?:\/\/)?)(?:[-;:&=+$,\w]+@)?[A-Za-z0-9.-]+|(?:www\.|[-;:&=+$,\w]+@)[A-Za-z0-9.-]+)((?:\/[+~%/.\w\-_]*)?\??(?:[-+=&;%@.\w_]*)#?(?:[.!/@\-\\\w]*))?)/g;
return markdown.match(re);
};
const find_duplicates = (arr) => {
const hm = {};
const dup = [];
arr.forEach((e) => {
if (hm[e]) dup.push(e);
else hm[e] = true;
});
return dup;
};
const partition = (arr, func) => {
const ap = [[], []];
arr.forEach((e) => (func(e) ? ap[0].push(e) : ap[1].push(e)));
return ap;
};
async function fetch_link(url) {
try {
const { ok, statusText, redirected } = await fetch(url, LINKS_OPTIONS);
return [url, { ok, status: statusText, redirected }];
} catch (error) {
return [url, { ok: false, status: error.message }];
}
}
async function batch_fetch({ arr, get, post_filter_func, BATCH_SIZE = 8 }) {
const result = [];
/* eslint-disable no-await-in-loop */
for (let i = 0; i < arr.length; i += BATCH_SIZE) {
const batch = arr.slice(i, i + BATCH_SIZE);
LOG.debug_string({ batch });
let res = await Promise.all(batch.map(get));
console.log(`batch fetched...${i + BATCH_SIZE}`);
res = post_filter_func ? res.filter(post_filter_func) : res;
LOG.debug_string({ res });
result.push(...res);
}
return result;
}
const exclude_length = exclude.length;
const exclude_from_list = (link) => {
let is_excluded = false;
for (let i = 0; i < exclude_length; i += 1) {
if (link.startsWith(exclude[i])) {
is_excluded = true;
break;
}
}
return is_excluded;
};
module.exports = {
LOG,
handleFailure,
extract_all_links,
find_duplicates,
partition,
fetch_link,
batch_fetch,
exclude_from_list,
};

66
tests/pull_request.js Normal file
View File

@ -0,0 +1,66 @@
const fs = require('fs-extra');
const helper = require('./common');
console.log({
DEBUG: process.env.DEBUG || false,
});
const README = 'README.md';
async function main() {
const has_error = {
show: false,
duplicates: '',
other_links_error: '',
};
const markdown = await fs.readFile(README, 'utf8');
let links = helper.extract_all_links(markdown);
links = links.filter((l) => !helper.exclude_from_list(l)); // exclude websites
helper.LOG.debug_string({ links });
console.log(`total links to check ${links.length}`);
console.log('checking for duplicates links...');
const duplicates = helper.find_duplicates(links);
if (duplicates.length > 0) {
has_error.show = true;
has_error.duplicates = duplicates;
}
helper.LOG.debug_string({ duplicates });
const [github_links, external_links] = helper.partition(links, (link) =>
link.startsWith('https://github.com'),
);
console.log(`checking ${external_links.length} external links...`);
const external_links_error = await helper.batch_fetch({
arr: external_links,
get: helper.fetch_link,
post_filter_func: (x) => !x[1].ok,
BATCH_SIZE: 8,
});
if (external_links_error.length > 0) {
has_error.show = true;
has_error.other_links_error = external_links_error;
}
console.log(`checking ${github_links.length} GitHub repositories...`);
console.log(
`skipping GitHub repository check. Run "npm run test" to execute them manually.`,
);
console.log({
TEST_PASSED: !has_error.show,
EXTERNAL_LINKS: external_links.length,
});
if (has_error.show) {
helper.LOG.error_string(has_error);
process.exit(1);
}
}
console.log('starting...');
main();

111
tests/test_all.js Normal file
View File

@ -0,0 +1,111 @@
const fs = require('fs-extra');
const fetch = require('node-fetch');
const helper = require('./common');
function envvar_undefined(variable_name) {
throw new Error(`${variable_name} must be defined`);
}
console.log({
DEBUG: process.env.DEBUG || false,
});
const README = 'README.md';
const GITHUB_GQL_API = 'https://api.github.com/graphql';
const TOKEN = process.env.GITHUB_TOKEN || envvar_undefined('GITHUB_TOKEN');
const Authorization = `token ${TOKEN}`;
const make_GQL_options = (query) => ({
method: 'POST',
headers: {
Authorization,
'Content-Type': 'application/json',
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
},
body: JSON.stringify({ query }),
});
const extract_repos = (arr) =>
arr
.map((e) => e.substr('https://github.com/'.length).split('/'))
.filter((r) => r.length === 2 && r[1] !== '');
const generate_GQL_query = (arr) =>
`query AWESOME_REPOS{ ${arr
.map(
([owner, name]) =>
`repo_${owner.replace(/(-|\.)/g, '_')}_${name.replace(
/(-|\.)/g,
'_',
)}: repository(owner: "${owner}", name:"${name}"){ nameWithOwner } `,
)
.join('')} }`;
async function main() {
const has_error = {
show: false,
duplicates: '',
other_links_error: '',
github_repos: '',
};
const markdown = await fs.readFile(README, 'utf8');
let links = helper.extract_all_links(markdown);
links = links.filter((l) => !helper.exclude_from_list(l)); // exclude websites
helper.LOG.debug_string({ links });
console.log(`total links to check ${links.length}`);
console.log('checking for duplicates links...');
const duplicates = helper.find_duplicates(links);
if (duplicates.length > 0) {
has_error.show = true;
has_error.duplicates = duplicates;
}
helper.LOG.debug_string({ duplicates });
const [github_links, external_links] = helper.partition(links, (link) =>
link.startsWith('https://github.com'),
);
console.log(`checking ${external_links.length} external links...`);
const external_links_error = await helper.batch_fetch({
arr: external_links,
get: helper.fetch_link,
post_filter_func: (x) => !x[1].ok,
BATCH_SIZE: 8,
});
if (external_links_error.length > 0) {
has_error.show = true;
has_error.other_links_error = external_links_error;
}
console.log(`checking ${github_links.length} GitHub repositories...`);
const repos = extract_repos(github_links);
const query = generate_GQL_query(repos);
const options = make_GQL_options(query);
const gql_response = await fetch(GITHUB_GQL_API, options).then((r) =>
r.json(),
);
if (gql_response.errors) {
has_error.show = true;
has_error.github_repos = gql_response.errors;
}
console.log({
TEST_PASSED: has_error.show,
GITHUB_REPOSITORY: github_links.length,
EXTERNAL_LINKS: external_links.length,
});
if (has_error.show) {
helper.LOG.error_string(has_error);
process.exit(1);
}
}
console.log('starting...');
main();