Merge pull request #911 from veggiemonk/remove-check-ghrepo

This commit is contained in:
Julien Bisconti 2020-11-11 08:48:02 +01:00 committed by GitHub
commit ace3509726
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 296 additions and 351 deletions

View File

@ -2,6 +2,17 @@
<!-- **Remember that entries are ordered alphabetically** --> <!-- **Remember that entries are ordered alphabetically** -->
<!--
TLDR:
* all entries sorted alphabetically,
* If paying service add :heavy_dollar_sign:
* If WIP add :construction:
* clear and short description of the project
* project MUST have: How to setup/install
* project MUST have: How to use (examples)
* we can help you get there :)
-->
# Quality Standards # Quality Standards
Note that we can help you achieve those standards, just try your best and be brave. Note that we can help you achieve those standards, just try your best and be brave.

View File

@ -25,6 +25,6 @@ jobs:
- name: Install Dependencies - name: Install Dependencies
# if: steps.cache.outputs.cache-hit != 'true' # if: steps.cache.outputs.cache-hit != 'true'
run: npm ci --ignore-scripts --no-audit --no-progress --prefer-offline run: npm ci --ignore-scripts --no-audit --no-progress --prefer-offline
- run: npm run test - run: npm run test-pr
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

1
.gitignore vendored
View File

@ -8,4 +8,5 @@ dist
website/index.html website/index.html
website/table.html website/table.html
.idea
**/.DS_Store **/.DS_Store

1
.npmrc
View File

@ -1 +0,0 @@
save-exact=true

View File

@ -1,4 +1,6 @@
{ {
"bracketSpacing": true,
"tabWidth": 4,
"semi": true, "semi": true,
"trailingComma": "all", "trailingComma": "all",
"singleQuote": true "singleQuote": true

View File

@ -1,132 +0,0 @@
const fs = require('fs-extra');
const fetch = require('node-fetch');
require('draftlog').into(console);
const LOG = {
error: (...args) => console.error(' ERROR', { ...args }),
debug: (...args) => {
if (process.env.DEBUG) console.log('💡 DEBUG: ', { ...args });
},
};
const handleFailure = (err) => {
LOG.error(err);
process.exit(1);
};
process.on('unhandledRejection', handleFailure);
if (!process.env.GITHUB_TOKEN) {
LOG.error('no credentials found.');
process.exit(1);
}
const TOKEN = process.env.GITHUB_TOKEN;
// --- ENV VAR ---
const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 10;
const DELAY = parseInt(process.env.DELAY, 10) || 3000;
const INTERVAL = parseInt(process.env.INTERVAL, 10) || 1;
const INTERVAL_UNIT = process.env.INTERVAL_UNIT || 'days';
// --- FILES ---
const DATA_FOLDER = 'data';
const README = 'README.md';
const LATEST_FILENAME = `${DATA_FOLDER}/latest`;
const GITHUB_REPOS = `${DATA_FOLDER}/repository.json`;
const Authorization = `token ${TOKEN}`;
// --- HTTP ---
const API = 'https://api.github.com/';
const options = {
method: 'GET',
headers: {
'User-Agent': 'awesome-docker script listing',
'Content-Type': 'application/json',
Authorization,
},
};
// ----------------------------------------------------------------------------
const removeHost = (x) => x.slice('https://github.com/'.length, x.length);
const delay = (ms) =>
new Promise((resolve) => {
setTimeout(() => resolve(), ms);
});
const get = (pathURL, opt) => {
LOG.debug(`Fetching ${pathURL}`);
return fetch(`${API}repos/${pathURL}`, {
...options,
...opt,
})
.catch(handleFailure)
.then((response) => {
if (response.ok) return response.json();
throw new Error('Network response was not ok.');
})
.catch(handleFailure);
};
const fetchAll = (batch) =>
Promise.all(batch.map(async (pathURL) => get(pathURL)));
const extractAllLinks = (markdown) => {
const re = /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/g;
return markdown.match(re);
};
const extractAllRepos = (markdown) => {
const re = /https:\/\/github\.com\/([a-zA-Z0-9-._]+)\/([a-zA-Z0-9-._]+)/g;
const md = markdown.match(re);
return [...new Set(md)];
};
const ProgressBar = (i, batchSize, total) => {
const progress = Math.round((i / total) * 100);
const units = Math.round(progress / 2);
const barLine = console.draft('Starting batch...');
return barLine(
`[${'='.repeat(units)}${' '.repeat(50 - units)}] ${progress}% - # ${i}`,
);
};
// ----------------------------------------------------------------------------
async function batchFetchRepoMetadata(githubRepos) {
const repos = githubRepos.map(removeHost);
const metadata = [];
/* eslint-disable no-await-in-loop */
for (let i = 0; i < repos.length; i += BATCH_SIZE) {
const batch = repos.slice(i, i + BATCH_SIZE);
LOG.debug({ batch });
const res = await fetchAll(batch);
LOG.debug('batch fetched...');
metadata.push(...res);
ProgressBar(i, BATCH_SIZE, repos.length);
// poor man's rate limiting so github doesn't ban us
await delay(DELAY);
}
ProgressBar(repos.length, BATCH_SIZE, repos.length);
return metadata;
}
async function main() {
try {
const markdown = await fs.readFile(README, 'utf8');
const links = extractAllLinks(markdown);
const githubRepos = extractAllRepos(markdown);
LOG.debug('writing repo list to disk...');
await fs.outputJSON(GITHUB_REPOS, githubRepos, { spaces: 2 });
LOG.debug('fetching data...');
const metadata = await batchFetchRepoMetadata(githubRepos);
LOG.debug('gracefully shutting down.');
process.exit();
} catch (err) {
handleFailure(err);
}
}
main();

View File

@ -5,7 +5,8 @@
"main": "build.js", "main": "build.js",
"scripts": { "scripts": {
"build": "rimraf ./dist/ && node build.js", "build": "rimraf ./dist/ && node build.js",
"test": "node pull_request.js" "test-pr": "node tests/pull_request.js",
"test": "node tests/test_all.js"
}, },
"repository": { "repository": {
"type": "git", "type": "git",

View File

@ -1,216 +0,0 @@
const fs = require('fs-extra');
const fetch = require('node-fetch');
const exclude = require('./exclude_in_test.json');
function envvar_undefined(variable_name) {
throw new Error(`${variable_name} must be defined`);
}
console.log({
DEBUG: process.env.DEBUG || false,
});
const README = 'README.md';
const GITHUB_GQL_API = 'https://api.github.com/graphql';
const TOKEN = process.env.GITHUB_TOKEN || envvar_undefined('GITHUB_TOKEN');
const LINKS_OPTIONS = {
redirect: 'error',
headers: {
'Content-Type': 'application/json',
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
},
};
const Authorization = `token ${TOKEN}`;
const make_GQL_options = (query) => ({
method: 'POST',
headers: {
Authorization,
'Content-Type': 'application/json',
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
},
body: JSON.stringify({ query }),
});
const LOG = {
error: (...args) => console.error('❌ ERROR', args),
error_string: (...args) =>
console.error('❌ ERROR', JSON.stringify({ ...args }, null, ' ')),
debug: (...args) => {
if (process.env.DEBUG) console.log('>>> DEBUG: ', { ...args });
},
debug_string: (...args) => {
if (process.env.DEBUG)
console.log('>>> DEBUG: ', JSON.stringify({ ...args }, null, ' '));
},
};
const handleFailure = (error) => {
console.error(`${error.message}: ${error.stack}`, { error });
process.exit(1);
};
process.on('unhandledRejection', handleFailure);
const extract_all_links = (markdown) => {
// if you have a problem and you try to solve it with a regex,
// now you have two problems
// TODO: replace this mess with a mardown parser ?
const re = /(((https:(?:\/\/)?)(?:[-;:&=+$,\w]+@)?[A-Za-z0-9.-]+|(?:www\.|[-;:&=+$,\w]+@)[A-Za-z0-9.-]+)((?:\/[+~%/.\w\-_]*)?\??(?:[-+=&;%@.\w_]*)#?(?:[.!/@\-\\\w]*))?)/g;
return markdown.match(re);
};
const find_duplicates = (arr) => {
const hm = {};
const dup = [];
arr.forEach((e) => {
if (hm[e]) dup.push(e);
else hm[e] = true;
});
return dup;
};
const partition = (arr, func) => {
const ap = [[], []];
arr.forEach((e) => (func(e) ? ap[0].push(e) : ap[1].push(e)));
return ap;
};
async function fetch_link(url) {
try {
const { ok, statusText, redirected } = await fetch(url, LINKS_OPTIONS);
return [url, { ok, status: statusText, redirected }];
} catch (error) {
return [url, { ok: false, status: error.message }];
}
}
async function batch_fetch({ arr, get, post_filter_func, BATCH_SIZE = 8 }) {
const result = [];
/* eslint-disable no-await-in-loop */
for (let i = 0; i < arr.length; i += BATCH_SIZE) {
const batch = arr.slice(i, i + BATCH_SIZE);
LOG.debug_string({ batch });
let res = await Promise.all(batch.map(get));
console.log(`batch fetched...${i + BATCH_SIZE}`);
res = post_filter_func ? res.filter(post_filter_func) : res;
LOG.debug_string({ res });
result.push(...res);
}
return result;
}
const extract_repos = (arr) =>
arr
.map((e) => e.substr('https://github.com/'.length).split('/'))
.filter((r) => r.length === 2 && r[1] !== '');
const generate_GQL_query = (arr) =>
`query AWESOME_REPOS{ ${arr
.map(
([owner, name]) =>
`repo_${owner.replace(/(-|\.)/g, '_')}_${name.replace(
/(-|\.)/g,
'_',
)}: repository(owner: "${owner}", name:"${name}"){ nameWithOwner } `,
)
.join('')} }`;
// =============================================================
// const batch_github_repos = async (github_links) => {
// const BATCH_SIZE = 50;
// const repos = extract_repos(github_links);
// for (let i = 0; i < repos.length; i += BATCH_SIZE) {
// const batch = repos.slice(i, i + BATCH_SIZE);
// const query = generate_GQL_query(batch);
// LOG.debug({ query });
// const gql_response = await fetch(
// 'https://api.github.com/graphql',
// make_GQL_options(query),
// ).then((r) => r.json());
// LOG.debug({ gql_response });
// }
// };
// =============================================================
const exclude_length = exclude.length;
const exclude_from_list = (link) => {
let is_excluded = false;
for (let i = 0; i < exclude_length; i += 1) {
if (link.startsWith(exclude[i])) {
is_excluded = true;
break;
}
}
return is_excluded;
};
async function main() {
const has_error = {
show: false,
duplicates: '',
other_links_error: '',
github_repos: '',
};
const markdown = await fs.readFile(README, 'utf8');
let links = extract_all_links(markdown);
links = links.filter((l) => !exclude_from_list(l)); // exclude websites
LOG.debug_string({ links });
console.log(`total links to check ${links.length}`);
console.log('checking for duplicates links...');
const duplicates = find_duplicates(links);
if (duplicates.length > 0) {
has_error.show = true;
has_error.duplicates = duplicates;
}
LOG.debug_string({ duplicates });
const [github_links, external_links] = partition(links, (link) =>
link.startsWith('https://github.com'),
);
console.log(`checking ${external_links.length} external links...`);
const external_links_error = await batch_fetch({
arr: external_links,
get: fetch_link,
post_filter_func: (x) => !x[1].ok,
BATCH_SIZE: 8,
});
if (external_links_error.length > 0) {
has_error.show = true;
has_error.other_links_error = external_links_error;
}
console.log(`checking ${github_links.length} GitHub repositories...`);
const repos = extract_repos(github_links);
const query = generate_GQL_query(repos);
const options = make_GQL_options(query);
const gql_response = await fetch(GITHUB_GQL_API, options).then((r) =>
r.json(),
);
if (gql_response.errors) {
has_error.show = true;
has_error.github_repos = gql_response.errors;
}
console.log({
TEST_PASSED: has_error.show,
GITHUB_REPOSITORY: github_links.length,
EXTERNAL_LINKS: external_links.length,
});
if (has_error.show) {
LOG.error_string(has_error);
process.exit(1);
}
}
console.log('starting...');
main();

102
tests/common.js Normal file
View File

@ -0,0 +1,102 @@
const fetch = require('node-fetch');
const exclude = require('./exclude_in_test.json');
const LINKS_OPTIONS = {
redirect: 'error',
headers: {
'Content-Type': 'application/json',
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
},
};
const LOG = {
error: (...args) => console.error('❌ ERROR', args),
error_string: (...args) =>
console.error('❌ ERROR', JSON.stringify({ ...args }, null, ' ')),
debug: (...args) => {
if (process.env.DEBUG) console.log('>>> DEBUG: ', { ...args });
},
debug_string: (...args) => {
if (process.env.DEBUG)
console.log('>>> DEBUG: ', JSON.stringify({ ...args }, null, ' '));
},
};
const handleFailure = (error) => {
console.error(`${error.message}: ${error.stack}`, { error });
process.exit(1);
};
process.on('unhandledRejection', handleFailure);
const extract_all_links = (markdown) => {
// if you have a problem and you try to solve it with a regex,
// now you have two problems
// TODO: replace this mess with a mardown parser ?
const re = /(((https:(?:\/\/)?)(?:[-;:&=+$,\w]+@)?[A-Za-z0-9.-]+|(?:www\.|[-;:&=+$,\w]+@)[A-Za-z0-9.-]+)((?:\/[+~%/.\w\-_]*)?\??(?:[-+=&;%@.\w_]*)#?(?:[.!/@\-\\\w]*))?)/g;
return markdown.match(re);
};
const find_duplicates = (arr) => {
const hm = {};
const dup = [];
arr.forEach((e) => {
if (hm[e]) dup.push(e);
else hm[e] = true;
});
return dup;
};
const partition = (arr, func) => {
const ap = [[], []];
arr.forEach((e) => (func(e) ? ap[0].push(e) : ap[1].push(e)));
return ap;
};
async function fetch_link(url) {
try {
const { ok, statusText, redirected } = await fetch(url, LINKS_OPTIONS);
return [url, { ok, status: statusText, redirected }];
} catch (error) {
return [url, { ok: false, status: error.message }];
}
}
async function batch_fetch({ arr, get, post_filter_func, BATCH_SIZE = 8 }) {
const result = [];
/* eslint-disable no-await-in-loop */
for (let i = 0; i < arr.length; i += BATCH_SIZE) {
const batch = arr.slice(i, i + BATCH_SIZE);
LOG.debug_string({ batch });
let res = await Promise.all(batch.map(get));
console.log(`batch fetched...${i + BATCH_SIZE}`);
res = post_filter_func ? res.filter(post_filter_func) : res;
LOG.debug_string({ res });
result.push(...res);
}
return result;
}
const exclude_length = exclude.length;
const exclude_from_list = (link) => {
let is_excluded = false;
for (let i = 0; i < exclude_length; i += 1) {
if (link.startsWith(exclude[i])) {
is_excluded = true;
break;
}
}
return is_excluded;
};
module.exports = {
LOG,
handleFailure,
extract_all_links,
find_duplicates,
partition,
fetch_link,
batch_fetch,
exclude_from_list,
};

66
tests/pull_request.js Normal file
View File

@ -0,0 +1,66 @@
const fs = require('fs-extra');
const helper = require('./common');
console.log({
DEBUG: process.env.DEBUG || false,
});
const README = 'README.md';
async function main() {
const has_error = {
show: false,
duplicates: '',
other_links_error: '',
};
const markdown = await fs.readFile(README, 'utf8');
let links = helper.extract_all_links(markdown);
links = links.filter((l) => !helper.exclude_from_list(l)); // exclude websites
helper.LOG.debug_string({ links });
console.log(`total links to check ${links.length}`);
console.log('checking for duplicates links...');
const duplicates = helper.find_duplicates(links);
if (duplicates.length > 0) {
has_error.show = true;
has_error.duplicates = duplicates;
}
helper.LOG.debug_string({ duplicates });
const [github_links, external_links] = helper.partition(links, (link) =>
link.startsWith('https://github.com'),
);
console.log(`checking ${external_links.length} external links...`);
const external_links_error = await helper.batch_fetch({
arr: external_links,
get: helper.fetch_link,
post_filter_func: (x) => !x[1].ok,
BATCH_SIZE: 8,
});
if (external_links_error.length > 0) {
has_error.show = true;
has_error.other_links_error = external_links_error;
}
console.log(`checking ${github_links.length} GitHub repositories...`);
console.log(
`skipping GitHub repository check. Run "npm run test" to execute them manually.`,
);
console.log({
TEST_PASSED: !has_error.show,
EXTERNAL_LINKS: external_links.length,
});
if (has_error.show) {
helper.LOG.error_string(has_error);
process.exit(1);
}
}
console.log('starting...');
main();

111
tests/test_all.js Normal file
View File

@ -0,0 +1,111 @@
const fs = require('fs-extra');
const fetch = require('node-fetch');
const helper = require('./common');
function envvar_undefined(variable_name) {
throw new Error(`${variable_name} must be defined`);
}
console.log({
DEBUG: process.env.DEBUG || false,
});
const README = 'README.md';
const GITHUB_GQL_API = 'https://api.github.com/graphql';
const TOKEN = process.env.GITHUB_TOKEN || envvar_undefined('GITHUB_TOKEN');
const Authorization = `token ${TOKEN}`;
const make_GQL_options = (query) => ({
method: 'POST',
headers: {
Authorization,
'Content-Type': 'application/json',
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
},
body: JSON.stringify({ query }),
});
const extract_repos = (arr) =>
arr
.map((e) => e.substr('https://github.com/'.length).split('/'))
.filter((r) => r.length === 2 && r[1] !== '');
const generate_GQL_query = (arr) =>
`query AWESOME_REPOS{ ${arr
.map(
([owner, name]) =>
`repo_${owner.replace(/(-|\.)/g, '_')}_${name.replace(
/(-|\.)/g,
'_',
)}: repository(owner: "${owner}", name:"${name}"){ nameWithOwner } `,
)
.join('')} }`;
async function main() {
const has_error = {
show: false,
duplicates: '',
other_links_error: '',
github_repos: '',
};
const markdown = await fs.readFile(README, 'utf8');
let links = helper.extract_all_links(markdown);
links = links.filter((l) => !helper.exclude_from_list(l)); // exclude websites
helper.LOG.debug_string({ links });
console.log(`total links to check ${links.length}`);
console.log('checking for duplicates links...');
const duplicates = helper.find_duplicates(links);
if (duplicates.length > 0) {
has_error.show = true;
has_error.duplicates = duplicates;
}
helper.LOG.debug_string({ duplicates });
const [github_links, external_links] = helper.partition(links, (link) =>
link.startsWith('https://github.com'),
);
console.log(`checking ${external_links.length} external links...`);
const external_links_error = await helper.batch_fetch({
arr: external_links,
get: helper.fetch_link,
post_filter_func: (x) => !x[1].ok,
BATCH_SIZE: 8,
});
if (external_links_error.length > 0) {
has_error.show = true;
has_error.other_links_error = external_links_error;
}
console.log(`checking ${github_links.length} GitHub repositories...`);
const repos = extract_repos(github_links);
const query = generate_GQL_query(repos);
const options = make_GQL_options(query);
const gql_response = await fetch(GITHUB_GQL_API, options).then((r) =>
r.json(),
);
if (gql_response.errors) {
has_error.show = true;
has_error.github_repos = gql_response.errors;
}
console.log({
TEST_PASSED: has_error.show,
GITHUB_REPOSITORY: github_links.length,
EXTERNAL_LINKS: external_links.length,
});
if (has_error.show) {
helper.LOG.error_string(has_error);
process.exit(1);
}
}
console.log('starting...');
main();