Rewrite parser for new instance list fmt

This also contains a breaking changes to the API:
  - The flag and region values has been moved. They're now within the
    country dict on the JSON response
This commit is contained in:
syeopite 2021-05-30 11:33:40 -07:00
parent 67a99e2c3b
commit af70b50041
No known key found for this signature in database
GPG Key ID: 6FA616E5A5294A82
3 changed files with 138 additions and 56 deletions

118
src/fetch.cr Normal file
View File

@ -0,0 +1,118 @@
def fetch_country(md)
region = md["flag"]?.try { |region| region.codepoints.map { |codepoint| (codepoint - 0x1f1a5).chr }.join("") }
flag = md["flag"]?
country_name = md["country_name"]?
return {flag: flag, region: region, name: country_name}
end
def fetch_notes(md)
notes = md["notes"].strip("|")
if notes.empty?
notes = nil
end
return notes
end
def prepare_http_instance(md, instances, monitors)
uri = URI.parse(md["uri"])
host = md["host"]
country = fetch_country(md)
status_url = md["status_url"]?
privacy_policy = md["privacy_policy"]?
ddos_protection = md["ddos_protection"].strip
if ddos_protection == "None"
ddos_protection = nil
end
owner = {name: md["owner"].strip("@"), url: md["owner_url"]}
notes = fetch_notes(md)
client = HTTP::Client.new(uri)
client.connect_timeout = 5.seconds
client.read_timeout = 5.seconds
begin
stats = JSON.parse(client.get("/api/v1/stats").body)
rescue ex
stats = nil
end
monitor = monitors.try &.select { |monitor| monitor["name"].try &.as_s == host }[0]?
return {country: country, stats: stats, type: "https", uri: uri.to_s, status_url: status_url,
privacy_policy: privacy_policy, ddos_protection: ddos_protection,
owner: owner, notes: notes, monitor: monitor || instances[host]?.try &.[:monitor]?}
end
def prepare_onion_instance(md, instances)
uri = URI.parse(md["uri"])
host = md["host"]
clearnet_url = md["clearnet_url"]
country = fetch_country(md)
privacy_policy = md["privacy_policy"]?
owner = {name: md["owner"].strip("@"), url: md["owner_url"]}
notes = fetch_notes(md)
if CONFIG["fetch_onion_instance_stats"]?
begin
args = Process.parse_arguments("--socks5-hostname '#{CONFIG["tor_sock_proxy_address"]}:#{CONFIG["tor_sock_proxy_port"]}' 'http://#{uri.host}/api/v1/stats'")
response = nil
Process.run("curl", args: args) do |result|
data = result.output.read_line
response = JSON.parse(data)
end
stats = response
rescue ex
stats = nil
end
else
stats = nil
end
return {country: country, stats: stats, type: "onion", uri: uri.to_s, clearnet_url: clearnet_url,
privacy_policy: privacy_policy, owner: owner, notes: notes,
monitor: nil}
end
def get_clearnet_instances(body, instances, monitors)
# Crystal currently lacks a markdown parser that supports tables. So...
clear_net_regexes = [
/\[(?<host>[^ \]]+)\]\((?<uri>[^\)]+)\)/, # Address column
/(?<flag>[\x{1f100}-\x{1f1ff}]{2}) (?<country_name>[^ ]+)/, # Country column
/((\[[^\]]+\]\(.*\){1}\])\((?<status_url>.*)\)|(None))/, # Status column
/((\[[^ \]]+\]\((?<privacy_policy>[^\)]+)\))|(None))/, # privacy policy column
/(?<ddos_protection>.*)/, # DDOS protection column
/\[(?<owner>[^ \]]+)\]\((?<owner_url>[^\)]+)\)/, # Owner column
/(?<notes>.*)/, # Note column
]
body.scan(/#{clear_net_regexes.join(/ +\| +/)}/mx).each do |md|
host = md["host"]
instances[host] = prepare_http_instance(md, instances, monitors)
end
end
def get_onion_instances(body, instances)
# Crystal currently lacks a markdown parser that supports tables. So...
clear_net_regexes = [
/\[(?<host>[^ \]]+)\]\((?<uri>[^\)]+)\)/, # Address column
/(?<flag>[\x{1f100}-\x{1f1ff}]{2}) (?<country_name>[^ ]+)/, # Country column
/\[(?<clearnet_host>[^ \]]+)\]\((?<clearnet_url>[^\)]+)\)/, # Clearnet instance column
/((\[[^ \]]+\]\((?<privacy_policy>[^\)]+)\))|(None))/, # privacy policy column
/\[(?<owner>[^ \]]+)\]\((?<owner_url>[^\)]+)\)/, # Owner column
/(?<notes>.*)/, # Notes column
]
body.scan(/#{clear_net_regexes.join(/ +\| +/)}/mx).each do |md|
host = md["host"]
instances[host] = prepare_onion_instance(md, instances)
end
end

View File

@ -18,7 +18,8 @@ require "http/client"
require "kemal"
require "uri"
require "./helpers/*"
require "./fetch.cr"
require "./helpers/helpers.cr"
CONFIG = load_config()
@ -28,9 +29,12 @@ macro rendered(filename)
render "src/instances/views/#{{{filename}}}.ecr"
end
alias Instance = NamedTuple(flag: String?, region: String?, stats: JSON::Any?, type: String, uri: String, monitor: JSON::Any?)
alias Owner = NamedTuple(name: String, url: String)
alias Country = NamedTuple(flag: String?, region: String?, name: String?)
alias ClearNetInstance = NamedTuple(country: Country, stats: JSON::Any?, type: String, uri: String, status_url: String?, privacy_policy: String?, ddos_protection: String?, owner: Owner, notes: String?, monitor: JSON::Any?)
alias OnionInstance = NamedTuple(country: Country, stats: JSON::Any?, type: String, uri: String, clearnet_url: String?, privacy_policy: String?, owner: Owner, notes: String?, monitor: JSON::Any?)
INSTANCES = {} of String => Instance
INSTANCES = {} of String => ClearNetInstance | OnionInstance
spawn do
loop do
@ -55,55 +59,15 @@ spawn do
end
end
begin
body = HTTP::Client.get(URI.parse("https://raw.githubusercontent.com/iv-org/documentation/master/Invidious-Instances.md")).body
# Needs to be replaced once merged!
body = HTTP::Client.get(URI.parse("https://raw.githubusercontent.com/TheFrenchGhosty/documentation/instances-list-rewrite/Public-Instances.md")).body
rescue ex
body = ""
end
instances = {} of String => Instance
body = body.split("### Blocked:")[0]
body.scan(/\[(?<host>[^ \]]+)\]\((?<uri>[^\)]+)\)( .(?<region>[\x{1f100}-\x{1f1ff}]{2}))?/mx).each do |md|
region = md["region"]?.try { |region| region.codepoints.map { |codepoint| (codepoint - 0x1f1a5).chr }.join("") }
flag = md["region"]?
uri = URI.parse(md["uri"])
host = md["host"]
case type = host.split(".")[-1]
when "onion"
type = "onion"
if CONFIG["fetch_onion_instance_stats"]?
begin
args = Process.parse_arguments("--socks5-hostname '#{CONFIG["tor_sock_proxy_address"]}:#{CONFIG["tor_sock_proxy_port"]}' 'http://#{uri.host}/api/v1/stats'")
response = nil
Process.run("curl", args: args) do |result|
data = result.output.read_line
response = JSON.parse(data)
end
stats = response
rescue ex
stats = nil
end
end
when "i2p"
else
type = uri.scheme.not_nil!
client = HTTP::Client.new(uri)
client.connect_timeout = 5.seconds
client.read_timeout = 5.seconds
begin
stats = JSON.parse(client.get("/api/v1/stats").body)
rescue ex
stats = nil
end
end
monitor = monitors.try &.select { |monitor| monitor["name"].try &.as_s == host }[0]?
instances[host] = {flag: flag, region: region, stats: stats, type: type, uri: uri.to_s, monitor: monitor || instances[host]?.try &.[:monitor]?}
end
instances = {} of String => ClearNetInstance | OnionInstance
get_clearnet_instances(body, instances, monitors)
get_onion_instances(body, instances)
INSTANCES.clear
INSTANCES.merge! instances
@ -154,13 +118,13 @@ static_headers do |response, filepath, filestat|
end
SORT_PROCS = {
"health" => ->(name : String, instance : Instance) { -(instance[:monitor]?.try &.["30dRatio"]["ratio"].as_s.to_f || 0.0) },
"location" => ->(name : String, instance : Instance) { instance[:region]? || "ZZ" },
"name" => ->(name : String, instance : Instance) { name },
"signup" => ->(name : String, instance : Instance) { instance[:stats]?.try &.["openRegistrations"]?.try { |bool| bool.as_bool ? 0 : 1 } || 2 },
"type" => ->(name : String, instance : Instance) { instance[:type] },
"users" => ->(name : String, instance : Instance) { -(instance[:stats]?.try &.["usage"]?.try &.["users"]["total"].as_i || 0) },
"version" => ->(name : String, instance : Instance) { instance[:stats]?.try &.["software"]?.try &.["version"].as_s.try &.split("-", 2)[0].split(".").map { |a| -a.to_i } || [0, 0, 0] },
"health" => ->(name : String, instance : ClearNetInstance | OnionInstance) { -(instance[:monitor]?.try &.["30dRatio"]["ratio"].as_s.to_f || 0.0) },
"location" => ->(name : String, instance : ClearNetInstance | OnionInstance) { instance[:country][:region]? || "ZZ" },
"name" => ->(name : String, instance : ClearNetInstance | OnionInstance) { name },
"signup" => ->(name : String, instance : ClearNetInstance | OnionInstance) { instance[:stats]?.try &.["openRegistrations"]?.try { |bool| bool.as_bool ? 0 : 1 } || 2 },
"type" => ->(name : String, instance : ClearNetInstance | OnionInstance) { instance[:type] },
"users" => ->(name : String, instance : ClearNetInstance | OnionInstance) { -(instance[:stats]?.try &.["usage"]?.try &.["users"]["total"].as_i || 0) },
"version" => ->(name : String, instance : ClearNetInstance | OnionInstance) { instance[:stats]?.try &.["software"]?.try &.["version"].as_s.try &.split("-", 2)[0].split(".").map { |a| -a.to_i } || [0, 0, 0] },
}
def sort_instances(instances, sort_by)

View File

@ -118,7 +118,7 @@
<td><%= instance[:type] %></td>
<td><%= instance[:stats]?.try &.["usage"]?.try &.["users"]["total"] || "-" %></td>
<td><%= instance[:stats]?.try &.["openRegistrations"]?.try { |bool| bool.as_bool ? "✔" : "❌" } || "-" %></td>
<td><%= instance[:flag]? ? "#{instance[:flag]} #{instance[:region]}" : "-" %></td>
<td><%= instance[:country]? ? "#{instance[:country][:flag]} #{instance[:country][:name]}" : "-" %></td>
<td><%= instance[:monitor]?.try &.["30dRatio"]["ratio"] || "-" %></td>
</tr>
<% end %>