From 997449ab4b11124d11e638294ea35a4018343148 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Fri, 16 Mar 2018 19:36:49 -0500 Subject: [PATCH] Add language detection for top videos --- shard.yml | 3 +++ src/helpers.cr | 31 +++++++++++++++++++++++++++---- src/invidious.cr | 9 ++++++++- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/shard.yml b/shard.yml index f9017d62..dd66fd35 100644 --- a/shard.yml +++ b/shard.yml @@ -15,6 +15,9 @@ dependencies: pg: github: will/crystal-pg branch: master + detect_language: + github: omarroth/detectlanguage-crystal + branch: v0.24.2 crystal: 0.24.2 diff --git a/src/helpers.cr b/src/helpers.cr index f6b1e52b..c54dfb22 100644 --- a/src/helpers.cr +++ b/src/helpers.cr @@ -24,7 +24,8 @@ class Config port: Int32, dbname: String, ), - redirect: Bool, + redirect: Bool, + dl_api_key: String | Nil, }) end @@ -229,7 +230,7 @@ def decrypt_signature(a) return a.join("") end -def rank_videos(db, n) +def rank_videos(db, n, pool, filter) top = [] of {Float64, String} db.query("SELECT id, wilson_score, published FROM videos WHERE views > 5000 ORDER BY published DESC LIMIT 10000") do |rs| @@ -250,8 +251,30 @@ def rank_videos(db, n) top.reverse! top = top.map { |a, b| b } - # Return top - return top[0..n - 1] + if filter + language_list = [] of String + top.each do |id| + if language_list.size == n + break + else + client = get_client(pool) + video = get_video(id, client, db) + pool << client + + description = XML.parse(video.description) + content = [video.title, description.content].join(" ") + + results = DetectLanguage.detect(content) + + if results[0].language == "en" + language_list << id + end + end + end + return language_list + else + return top[0..n - 1] + end end def make_client(url) diff --git a/src/invidious.cr b/src/invidious.cr index 6796008f..f373be42 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -14,6 +14,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +require "detect_language" require "kemal" require "option_parser" require "pg" @@ -135,8 +136,14 @@ end top_videos = [] of Video spawn do + if CONFIG.dl_api_key + DetectLanguage.configure do |config| + config.api_key = CONFIG.dl_api_key.not_nil! + end + end + loop do - top = rank_videos(PG_DB, 40) + top = rank_videos(PG_DB, 40, youtube_pool, true) if top.size > 0 args = arg_array(top)