mirror of
https://github.com/iv-org/invidious.git
synced 2024-10-01 01:35:38 -04:00
commit
3c6a662aaf
@ -51,6 +51,12 @@ module Invidious::Routes::Search
|
|||||||
else
|
else
|
||||||
user = env.get? "user"
|
user = env.get? "user"
|
||||||
|
|
||||||
|
# An URL was copy/pasted in the search box.
|
||||||
|
# Redirect the user to the appropriate page.
|
||||||
|
if query.is_url?
|
||||||
|
return env.redirect UrlSanitizer.process(query.text).to_s
|
||||||
|
end
|
||||||
|
|
||||||
begin
|
begin
|
||||||
items = query.process
|
items = query.process
|
||||||
rescue ex : ChannelSearchException
|
rescue ex : ChannelSearchException
|
||||||
|
@ -20,6 +20,9 @@ module Invidious::Search
|
|||||||
property region : String?
|
property region : String?
|
||||||
property channel : String = ""
|
property channel : String = ""
|
||||||
|
|
||||||
|
# Flag that indicates if the smart search features have been disabled.
|
||||||
|
@inhibit_ssf : Bool = false
|
||||||
|
|
||||||
# Return true if @raw_query is either `nil` or empty
|
# Return true if @raw_query is either `nil` or empty
|
||||||
private def empty_raw_query?
|
private def empty_raw_query?
|
||||||
return @raw_query.empty?
|
return @raw_query.empty?
|
||||||
@ -48,10 +51,18 @@ module Invidious::Search
|
|||||||
)
|
)
|
||||||
# Get the raw search query string (common to all search types). In
|
# Get the raw search query string (common to all search types). In
|
||||||
# Regular search mode, also look for the `search_query` URL parameter
|
# Regular search mode, also look for the `search_query` URL parameter
|
||||||
if @type.regular?
|
_raw_query = params["q"]?
|
||||||
@raw_query = params["q"]? || params["search_query"]? || ""
|
_raw_query ||= params["search_query"]? if @type.regular?
|
||||||
else
|
_raw_query ||= ""
|
||||||
@raw_query = params["q"]? || ""
|
|
||||||
|
# Remove surrounding whitespaces. Mostly useful for copy/pasted URLs.
|
||||||
|
@raw_query = _raw_query.strip
|
||||||
|
|
||||||
|
# Check for smart features (ex: URL search) inhibitor (backslash).
|
||||||
|
# If inhibitor is present, remove it.
|
||||||
|
if @raw_query.starts_with?('\\')
|
||||||
|
@inhibit_ssf = true
|
||||||
|
@raw_query = @raw_query[1..]
|
||||||
end
|
end
|
||||||
|
|
||||||
# Get the page number (also common to all search types)
|
# Get the page number (also common to all search types)
|
||||||
@ -85,7 +96,7 @@ module Invidious::Search
|
|||||||
@filters = Filters.from_iv_params(params)
|
@filters = Filters.from_iv_params(params)
|
||||||
@channel = params["channel"]? || ""
|
@channel = params["channel"]? || ""
|
||||||
|
|
||||||
if @filters.default? && @raw_query.includes?(':')
|
if @filters.default? && @raw_query.index(/\w:\w/)
|
||||||
# Parse legacy filters from query
|
# Parse legacy filters from query
|
||||||
@filters, @channel, @query, subs = Filters.from_legacy_filters(@raw_query)
|
@filters, @channel, @query, subs = Filters.from_legacy_filters(@raw_query)
|
||||||
else
|
else
|
||||||
@ -136,5 +147,22 @@ module Invidious::Search
|
|||||||
|
|
||||||
return params
|
return params
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Checks if the query is a standalone URL
|
||||||
|
def is_url? : Bool
|
||||||
|
# If the smart features have been inhibited, don't go further.
|
||||||
|
return false if @inhibit_ssf
|
||||||
|
|
||||||
|
# Only supported in regular search mode
|
||||||
|
return false if !@type.regular?
|
||||||
|
|
||||||
|
# If filters are present, that's a regular search
|
||||||
|
return false if !@filters.default?
|
||||||
|
|
||||||
|
# Simple heuristics: domain name
|
||||||
|
return @raw_query.starts_with?(
|
||||||
|
/(https?:\/\/)?(www\.)?(m\.)?youtu(\.be|be\.com)\//
|
||||||
|
)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
121
src/invidious/yt_backend/url_sanitizer.cr
Normal file
121
src/invidious/yt_backend/url_sanitizer.cr
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
require "uri"
|
||||||
|
|
||||||
|
module UrlSanitizer
|
||||||
|
extend self
|
||||||
|
|
||||||
|
ALLOWED_QUERY_PARAMS = {
|
||||||
|
channel: ["u", "user", "lb"],
|
||||||
|
playlist: ["list"],
|
||||||
|
search: ["q", "search_query", "sp"],
|
||||||
|
watch: [
|
||||||
|
"v", # Video ID
|
||||||
|
"list", "index", # Playlist-related
|
||||||
|
"playlist", # Unnamed playlist (id,id,id,...) (embed-only?)
|
||||||
|
"t", "time_continue", "start", "end", # Timestamp
|
||||||
|
"lc", # Highlighted comment (watch page only)
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Returns whether the given string is an ASCII word. This is the same as
|
||||||
|
# running the following regex in US-ASCII locale: /^[\w-]+$/
|
||||||
|
private def ascii_word?(str : String) : Bool
|
||||||
|
return false if str.bytesize != str.size
|
||||||
|
|
||||||
|
str.each_byte do |byte|
|
||||||
|
next if 'a'.ord <= byte <= 'z'.ord
|
||||||
|
next if 'A'.ord <= byte <= 'Z'.ord
|
||||||
|
next if '0'.ord <= byte <= '9'.ord
|
||||||
|
next if byte == '-'.ord || byte == '_'.ord
|
||||||
|
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
|
||||||
|
# Return which kind of parameters are allowed based on the
|
||||||
|
# first path component (breadcrumb 0).
|
||||||
|
private def determine_allowed(path_root : String)
|
||||||
|
case path_root
|
||||||
|
when "watch", "w", "v", "embed", "e", "shorts", "clip"
|
||||||
|
return :watch
|
||||||
|
when .starts_with?("@"), "c", "channel", "user", "profile", "attribution_link"
|
||||||
|
return :channel
|
||||||
|
when "playlist", "mix"
|
||||||
|
return :playlist
|
||||||
|
when "results", "search"
|
||||||
|
return :search
|
||||||
|
else # hashtag, post, trending, brand URLs, etc..
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Create a new URI::Param containing only the allowed parameters
|
||||||
|
private def copy_params(unsafe_params : URI::Params, allowed_type) : URI::Params
|
||||||
|
new_params = URI::Params.new
|
||||||
|
|
||||||
|
ALLOWED_QUERY_PARAMS[allowed_type].each do |name|
|
||||||
|
if unsafe_params[name]?
|
||||||
|
# Only copy the last parameter, in case there is more than one
|
||||||
|
new_params[name] = unsafe_params.fetch_all(name)[-1]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return new_params
|
||||||
|
end
|
||||||
|
|
||||||
|
# Transform any user-supplied youtube URL into something we can trust
|
||||||
|
# and use across the code.
|
||||||
|
def process(str : String) : URI
|
||||||
|
# Because URI follows RFC3986 specifications, URL without a scheme
|
||||||
|
# will be parsed as a relative path. So we have to add a scheme ourselves.
|
||||||
|
str = "https://#{str}" if !str.starts_with?(/https?:\/\//)
|
||||||
|
|
||||||
|
unsafe_uri = URI.parse(str)
|
||||||
|
unsafe_host = unsafe_uri.host
|
||||||
|
unsafe_path = unsafe_uri.path
|
||||||
|
|
||||||
|
new_uri = URI.new(path: "/")
|
||||||
|
|
||||||
|
# Redirect to homepage for bogus URLs
|
||||||
|
return new_uri if (unsafe_host.nil? || unsafe_path.nil?)
|
||||||
|
|
||||||
|
breadcrumbs = unsafe_path
|
||||||
|
.split('/', remove_empty: true)
|
||||||
|
.compact_map do |bc|
|
||||||
|
# Exclude attempts at path trasversal
|
||||||
|
next if bc == "." || bc == ".."
|
||||||
|
|
||||||
|
# Non-alnum characters are unlikely in a genuine URL
|
||||||
|
next if !ascii_word?(bc)
|
||||||
|
|
||||||
|
bc
|
||||||
|
end
|
||||||
|
|
||||||
|
# If nothing remains, it's either a legit URL to the homepage
|
||||||
|
# (who does that!?) or because we filtered some junk earlier.
|
||||||
|
return new_uri if breadcrumbs.empty?
|
||||||
|
|
||||||
|
# Replace the original query parameters with the sanitized ones
|
||||||
|
case unsafe_host
|
||||||
|
when .ends_with?("youtube.com")
|
||||||
|
# Use our sanitized path (not forgetting the leading '/')
|
||||||
|
new_uri.path = "/#{breadcrumbs.join('/')}"
|
||||||
|
|
||||||
|
# Then determine which params are allowed, and copy them over
|
||||||
|
if allowed = determine_allowed(breadcrumbs[0])
|
||||||
|
new_uri.query_params = copy_params(unsafe_uri.query_params, allowed)
|
||||||
|
end
|
||||||
|
when "youtu.be"
|
||||||
|
# Always redirect to the watch page
|
||||||
|
new_uri.path = "/watch"
|
||||||
|
|
||||||
|
new_params = copy_params(unsafe_uri.query_params, :watch)
|
||||||
|
new_params["id"] = breadcrumbs[0]
|
||||||
|
|
||||||
|
new_uri.query_params = new_params
|
||||||
|
end
|
||||||
|
|
||||||
|
return new_uri
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue
Block a user