URL checks

This commit is contained in:
François Briatte 2017-03-14 09:40:30 +01:00
parent 2d41719b03
commit 174a742605
2 changed files with 40 additions and 44 deletions

View File

@ -1,36 +1,36 @@
2017-01-02 19:46:05 : checking 11 URLs 2017-03-14 09:04:07 : checking 11 URLs
http://derekgreene.com/gephitutorial/ http://asonam.cpsc.ucalgary.ca/
Status code: 404 Status code: 400
http://comnet.oxfordjournals.org/
Failed to access
http://comnet.oxfordjournals.org/content/4/3/457.short
Failed to access
http://netzwerkerei.org/
Status code: 403
http://nexus.igraph.org/ http://nexus.igraph.org/
Failed to access Failed to access
http://www.clementlevallois.net/gephi.html
Status code: 404
http://www.cmh.pro.ens.fr/reseaux-sociaux/
Failed to access
http://www.derekruths.com/ http://www.derekruths.com/
Status code: 403 Status code: 403
http://www.esri.com/software/arcgis/extensions/networkanalyst
Status code: 403
http://www.melissaclarkson.com/resources/R_guides/
Status code: 500
http://www.nature.com/nature/journal/v445/n7127/full/445489a.html http://www.nature.com/nature/journal/v445/n7127/full/445489a.html
Status code: 401 Status code: 401
http://www.ssc.wisc.edu/~emirbaye/Mustafa_Emirbayer/ARTICLES_files/manifesto%20for%20a%20relational%20sociology.pdf 2017-03-14 09:08:15 : done.
Status code: 404
http://www.ssc.wisc.edu/~emirbaye/Mustafa_Emirbayer/ARTICLES_files/network%20analysis,%20culture,%20and%20the%20problem%20of%20agency.pdf
Status code: 404
https://410f84824e101297359cc81c78f45c7c079eb26c.googledrive.com/host/0Bz6WHrWac3FrWnA5MjZqb3lWa2c/
Status code: 404
https://dlist.server.uni-frankfurt.de/mailman/listinfo/sna-de
Status code: 400
https://lra.le.ac.uk/bitstream/2381/36068/2/Draft%20BJM%20Revised%20(3rd%20iteration)%20Manuscript.pdf
Status code: 501
https://www.linkedin.com/in/mcculloh
Status code: 999
https://www.puf.com/content/R%C3%A9seaux_sociaux_et_structures_relationnelles
Status code: 501
2017-01-02 19:46:09 : done.

22
check.r
View File

@ -28,8 +28,7 @@ if (!file.exists(f)) {
unlist unlist
# total number of links # total number of links
t <- str_count(u, "http") %>% t <- sum(str_count(u, "http"))
sum
cat(t, "URLs, ") cat(t, "URLs, ")
@ -50,15 +49,13 @@ if (!file.exists(f)) {
cat("Source:", f, "\n") cat("Source:", f, "\n")
l <- readLines(f) %>% l <- str_subset(readLines(f), "^http")
str_subset("^http")
cat(length(l), "URLs, ") cat(length(l), "URLs, ")
} }
l <- unique(l) %>% l <- sort(unique(l))
sort
cat(length(l), "unique\n") cat(length(l), "unique\n")
@ -68,9 +65,7 @@ sink()
for (i in l) { for (i in l) {
x <- try(GET(i) %>% x <- try(status_code(GET(i)), silent = TRUE)
status_code,
silent = TRUE)
if (!"try-error" %in% class(x) && x != 200) { if (!"try-error" %in% class(x) && x != 200) {
@ -94,15 +89,16 @@ for (i in l) {
} }
if (!which(l == i) %% 50) if (!which(l == i) %% 50) {
cat("", length(l) - which(l == i), "left\n") cat("", length(l) - which(l == i), "left\n")
} }
}
sink(f, append = TRUE) sink(f, append = TRUE)
cat(as.character(Sys.time()), ": done.\n") cat(as.character(Sys.time()), ": done.\n")
sink() sink()
cat("\nFound", str_count(readLines(f), "^http") %>% cat("\nFound", sum(str_count(readLines(f), "^http")), "problems.\n")
sum,
"problems.\n")