mirror of
https://github.com/briatte/awesome-network-analysis.git
synced 2025-01-05 12:30:53 -05:00
R script to check URLs
This commit is contained in:
parent
52b0f27930
commit
3fef0b5ce4
54
check.r
Normal file
54
check.r
Normal file
@ -0,0 +1,54 @@
|
||||
library(httr)
|
||||
library(stringr)
|
||||
|
||||
u <- str_c(
|
||||
"https://raw.githubusercontent.com/",
|
||||
"briatte/awesome-network-analysis/",
|
||||
"master/README.md"
|
||||
)
|
||||
|
||||
u <- GET(u) %>%
|
||||
content("text") %>%
|
||||
str_split("\\n") %>% # so as to find [foo]: bar links
|
||||
unlist
|
||||
|
||||
# total number of links
|
||||
t <- str_count(u, "http") %>%
|
||||
sum
|
||||
|
||||
cat(t, "URLs, ")
|
||||
|
||||
l <- c(
|
||||
# [foo](bar)
|
||||
str_extract_all(u, "\\(http(.*?)\\)") %>%
|
||||
lapply(str_replace_all, "^\\(|\\)$", "") %>%
|
||||
unlist,
|
||||
# [foo]: bar
|
||||
str_extract_all(u, "^\\[(.*)\\]: (.*)") %>%
|
||||
unlist %>%
|
||||
str_replace("^\\[(.*)\\]: (.*)", "\\2")
|
||||
)
|
||||
|
||||
stopifnot(length(l) == t)
|
||||
|
||||
l <- unique(l)
|
||||
cat(length(l), "unique\n")
|
||||
|
||||
for (i in l) {
|
||||
|
||||
if (!which(l == i) %% 25)
|
||||
cat(length(l) - which(l == i), "left\n")
|
||||
|
||||
x <- try(GET(i) %>%
|
||||
status_code,
|
||||
silent = TRUE)
|
||||
|
||||
if (!"try-error" %in% class(x) && x != 200) {
|
||||
cat("\nURL:", i, "\nStatus code:", x, "\n")
|
||||
} else if("try-error" %in% class(x)) {
|
||||
cat("\nURL:", i, "\nFailed to access\n.")
|
||||
} else {
|
||||
cat(".")
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user