diff --git a/shard.yml b/shard.yml index e1420d6..67f01b8 100644 --- a/shard.yml +++ b/shard.yml @@ -1,5 +1,5 @@ name: protodec -version: 0.1.0 +version: 0.1.1 authors: - Omar Roth diff --git a/spec/protodec_spec.cr b/spec/protodec_spec.cr new file mode 100644 index 0000000..8575d4b --- /dev/null +++ b/spec/protodec_spec.cr @@ -0,0 +1,29 @@ +require "./spec_helper" + +describe Protodec do + it "decodes Base64 data" do + input = "4qmFsgIrEhhVQ0NqOTU2SUY2MkZiVDdHb3VzemFqOXcaD0VnbGpiMjF0ZFc1cGRIaw" + output = input.strip + .try { |i| URI.decode_www_form(i) } + .try { |i| URI.decode_www_form(i) } + .try { |i| Base64.decode(i) } + .try { |i| IO::Memory.new(i) } + .try { |i| Protodec::Any.parse(i) } + + output["80226972:0:embedded"]["2:0:string"].should eq("UCCj956IF62FbT7Gouszaj9w") + output["80226972:0:embedded"]["3:1:base64"]["2:0:string"].should eq("community") + end + + it "encodes JSON object" do + object = Protodec::Any.cast_json({ + "80226972:0:embedded" => { + "2:0:string" => "UCCj956IF62FbT7Gouszaj9w", + "3:1:base64" => { + "2:0:string" => "community", + }, + }, + }) + + Base64.urlsafe_encode(Protodec::Any.from_json(object), padding: false).should eq("4qmFsgIrEhhVQ0NqOTU2SUY2MkZiVDdHb3VzemFqOXcaD0VnbGpiMjF0ZFc1cGRIaw") + end +end diff --git a/spec/spec_helper.cr b/spec/spec_helper.cr new file mode 100644 index 0000000..1fdc497 --- /dev/null +++ b/spec/spec_helper.cr @@ -0,0 +1,2 @@ +require "spec" +require "../src/protodec/utils" diff --git a/src/protodec.cr b/src/protodec.cr index 575f710..06962b5 100644 --- a/src/protodec.cr +++ b/src/protodec.cr @@ -14,10 +14,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -require "base64" -require "json" require "option_parser" -require "uri" +require "./protodec/utils" CURRENT_BRANCH = {{ "#{`git branch | sed -n '/\* /s///p'`.strip}" }} CURRENT_COMMIT = {{ "#{`git rev-list HEAD --max-count=1 --abbrev-commit`.strip}" }} @@ -29,229 +27,6 @@ SOFTWARE = { "branch" => "#{CURRENT_BRANCH}", } -struct VarLong - def self.from_io(io : IO, format = IO::ByteFormat::NetworkEndian) : Int64 - result = 0_i64 - num_read = 0 - - loop do - byte = io.read_byte - raise "Invalid VarLong" if !byte - value = byte & 0x7f - - result |= value.to_i64 << (7 * num_read) - num_read += 1 - - break if byte & 0x80 == 0 - raise "Invalid VarLong" if num_read > 10 - end - - result - end - - def self.to_io(io : IO, value : Int64) - io.write_byte 0x00 if value == 0x00 - value = value.to_u64! - - while value != 0 - byte = (value & 0x7f).to_u8! - value >>= 7 - - if value != 0 - byte |= 0x80 - end - - io.write_byte byte - end - end -end - -struct ProtoBuf::Any - enum Tag - VarInt = 0 - Bit64 = 1 - LengthDelimited = 2 - Bit32 = 5 - end - - TAG_MAP = { - "varint" => 0, - "float32" => 5, - "int32" => 5, - "float64" => 1, - "int64" => 1, - "string" => 2, - "embedded" => 2, - "base64" => 2, - "bytes" => 2, - } - - alias Type = Int64 | - Float64 | - Array(UInt8) | - String | - Hash(String, Type) - - getter raw : Type - - def initialize(@raw : Type) - end - - def self.parse(io : IO) - from_io(io, ignore_exceptions: true) - end - - def self.from_io(io : IO, format = IO::ByteFormat::NetworkEndian, ignore_exceptions = false) - item = new({} of String => Type) - index = 0 - - begin - until io.pos == io.size - header = io.read_bytes(VarLong) - field = (header >> 3).to_i - type = Tag.new((header & 0b111).to_i) - - case type - when Tag::VarInt - value = io.read_bytes(VarLong) - key = "#{field}:#{index}:varint" - when Tag::Bit32 - value = io.read_bytes(Int32) - bytes = IO::Memory.new - value.to_io(bytes, IO::ByteFormat::LittleEndian) - bytes.rewind - - begin - value = bytes.read_bytes(Float32, format: IO::ByteFormat::LittleEndian).to_f64 - key = "#{field}:#{index}:float32" - rescue ex - value = value.to_i64 - key = "#{field}:#{index}:int32" - end - when Tag::Bit64 - value = io.read_bytes(Int64) - bytes = IO::Memory.new - value.to_io(bytes, IO::ByteFormat::LittleEndian) - bytes.rewind - - begin - value = bytes.read_bytes(Float64, format: IO::ByteFormat::LittleEndian) - key = "#{field}:#{index}:float64" - rescue ex - key = "#{field}:#{index}:int64" - end - when Tag::LengthDelimited - size = io.read_bytes(VarLong) - raise "Invalid size" if size > 2**22 - - bytes = Bytes.new(size) - io.read_fully(bytes) - - value = String.new(bytes) - if value.empty? - value = "" - key = "#{field}:#{index}:string" - elsif value.valid_encoding? && !value.codepoints.any? { |codepoint| - (0x00..0x1f).includes?(codepoint) && - !{0x09, 0x0a, 0x0d}.includes?(codepoint) - } - begin - value = from_io(IO::Memory.new(Base64.decode(URI.decode_www_form(URI.decode_www_form(value))))).raw - key = "#{field}:#{index}:base64" - rescue ex - key = "#{field}:#{index}:string" - end - else - begin - value = from_io(IO::Memory.new(bytes)).raw - key = "#{field}:#{index}:embedded" - rescue ex - value = bytes.to_a - key = "#{field}:#{index}:bytes" - end - end - else - raise "Invalid type #{type}" - end - - item[key] = value.as(Type) - index += 1 - end - rescue ex - raise ex if !ignore_exceptions - end - - item - end - - def []=(key : String, value : Type) - case object = @raw - when Hash - object[key] = value - else - raise "Expected Hash for #[]=(key : String, value : Type), not #{object.class}" - end - end - - def to_json - raw.to_json - end - - def to_json(json) - raw.to_json(json) - end - - def self.from_json(json : JSON::Any, io : IO, format = IO::ByteFormat::NetworkEndian) - case object = json.raw - when Hash - object.each do |key, value| - parts = key.split(":") - field = parts[0].to_i64 - type = parts[-1] - - header = (field << 3) | TAG_MAP[type] - VarLong.to_io(io, header) - - case type - when "varint" - VarLong.to_io(io, value.raw.as(Number).to_i64!) - when "int32" - value.raw.as(Number).to_i32!.to_io(io, IO::ByteFormat::LittleEndian) - when "float32" - value.raw.as(Number).to_f32!.to_io(io, IO::ByteFormat::LittleEndian) - when "int64" - value.raw.as(Number).to_i64!.to_io(io, IO::ByteFormat::LittleEndian) - when "float64" - value.raw.as(Number).to_f64!.to_io(io, IO::ByteFormat::LittleEndian) - when "string" - VarLong.to_io(io, value.as_s.bytesize.to_i64) - io.print value.as_s - when "base64" - buffer = IO::Memory.new - from_json(value, buffer) - buffer.rewind - - buffer = Base64.urlsafe_encode(buffer, padding: false) - VarLong.to_io(io, buffer.bytesize.to_i64) - buffer.to_s(io) - when "embedded" - buffer = IO::Memory.new - from_json(value, buffer) - buffer.rewind - - VarLong.to_io(io, buffer.bytesize.to_i64) - IO.copy(buffer, io) - when "bytes" - VarLong.to_io(io, value.size.to_i64) - value.as_a.each { |byte| io.write_byte byte.as_i.to_u8 } - end - end - else - raise "Invalid value #{json}" - end - end -end - enum IOType Base64 Hex @@ -321,15 +96,15 @@ end case input_type when IOType::Base64 - output = ProtoBuf::Any.parse(IO::Memory.new(Base64.decode(URI.decode_www_form(URI.decode_www_form(STDIN.gets_to_end.strip))))) + output = Protodec::Any.parse(IO::Memory.new(Base64.decode(URI.decode_www_form(URI.decode_www_form(STDIN.gets_to_end.strip))))) when IOType::Hex array = STDIN.gets_to_end.strip.split(/[- ,]+/).map &.to_i(16).to_u8 - output = ProtoBuf::Any.parse(IO::Memory.new(Slice.new(array.size) { |i| array[i] })) + output = Protodec::Any.parse(IO::Memory.new(Slice.new(array.size) { |i| array[i] })) when IOType::Raw - output = ProtoBuf::Any.parse(IO::Memory.new(STDIN.gets_to_end)) + output = Protodec::Any.parse(IO::Memory.new(STDIN.gets_to_end)) when IOType::Json, IOType::JsonPretty output = IO::Memory.new - ProtoBuf::Any.from_json(JSON.parse(STDIN), output) + Protodec::Any.from_json(JSON.parse(STDIN), output) else output = "" end @@ -342,7 +117,7 @@ when IOType::Hex when IOType::Raw STDOUT.write output.as(IO).to_slice when IOType::Json - STDOUT.puts output.as(ProtoBuf::Any).to_json + STDOUT.puts output.as(Protodec::Any).to_json when IOType::JsonPretty - STDOUT.puts output.as(ProtoBuf::Any).to_pretty_json + STDOUT.puts output.as(Protodec::Any).to_pretty_json end diff --git a/src/protodec/utils.cr b/src/protodec/utils.cr new file mode 100644 index 0000000..df9faf3 --- /dev/null +++ b/src/protodec/utils.cr @@ -0,0 +1,481 @@ +require "base64" +require "json" +require "uri" + +module Protodec + struct VarLong + def self.from_io(io : IO, format = IO::ByteFormat::NetworkEndian) : Int64 + result = 0_i64 + num_read = 0 + + loop do + byte = io.read_byte + raise "Invalid VarLong" if !byte + value = byte & 0x7f + + result |= value.to_i64 << (7 * num_read) + num_read += 1 + + break if byte & 0x80 == 0 + raise "Invalid VarLong" if num_read > 10 + end + + result + end + + def self.to_io(io : IO, value : Int64) + io.write_byte 0x00 if value == 0x00 + value = value.to_u64! + + while value != 0 + byte = (value & 0x7f).to_u8! + value >>= 7 + + if value != 0 + byte |= 0x80 + end + + io.write_byte byte + end + end + end + + struct Any + enum Tag + VarInt = 0 + Bit64 = 1 + LengthDelimited = 2 + Bit32 = 5 + end + + TAG_MAP = { + "varint" => 0, + "float32" => 5, + "int32" => 5, + "float64" => 1, + "int64" => 1, + "string" => 2, + "embedded" => 2, + "base64" => 2, + "bytes" => 2, + } + + alias Type = Int32 | + Int64 | + Float64 | + String | + Array(UInt8) | + Hash(String, Any) + + getter raw : Type + + def initialize(@raw : Type) + end + + def self.parse(io : IO) + from_io(io, ignore_exceptions: true) + end + + def self.from_io(io : IO, format = IO::ByteFormat::NetworkEndian, ignore_exceptions = false) + item = new({} of String => Any) + index = 0 + + begin + until io.size == io.pos + begin + header = io.read_bytes(VarLong) + rescue ex + next + end + + field = (header >> 3).to_i + type = Tag.new((header & 0b111).to_i) + + case type + when Tag::VarInt + value = io.read_bytes(VarLong) + key = "#{field}:#{index}:varint" + when Tag::Bit32 + value = io.read_bytes(Int32) + bytes = IO::Memory.new + value.to_io(bytes, IO::ByteFormat::LittleEndian) + bytes.rewind + + begin + value = bytes.read_bytes(Float32, format: IO::ByteFormat::LittleEndian).to_f64 + key = "#{field}:#{index}:float32" + rescue ex + value = value.to_i64 + key = "#{field}:#{index}:int32" + end + when Tag::Bit64 + value = io.read_bytes(Int64) + bytes = IO::Memory.new + value.to_io(bytes, IO::ByteFormat::LittleEndian) + bytes.rewind + + begin + value = bytes.read_bytes(Float64, format: IO::ByteFormat::LittleEndian) + key = "#{field}:#{index}:float64" + rescue ex + key = "#{field}:#{index}:int64" + end + when Tag::LengthDelimited + size = io.read_bytes(VarLong) + raise "Invalid size" if size > 2**22 + + bytes = Bytes.new(size) + io.read_fully(bytes) + + value = String.new(bytes) + if value.empty? + value = "" + key = "#{field}:#{index}:string" + elsif value.valid_encoding? && !value.codepoints.any? { |codepoint| + (0x00..0x1f).includes?(codepoint) && + !{0x09, 0x0a, 0x0d}.includes?(codepoint) + } + begin + value = from_io(IO::Memory.new(Base64.decode(URI.decode_www_form(URI.decode_www_form(value))))).raw + key = "#{field}:#{index}:base64" + rescue ex + key = "#{field}:#{index}:string" + end + else + begin + value = from_io(IO::Memory.new(bytes)).raw + key = "#{field}:#{index}:embedded" + rescue ex + value = bytes.to_a + key = "#{field}:#{index}:bytes" + end + end + else + raise "Invalid type #{type}" + end + + item[key] = value.as(Type) + index += 1 + end + rescue ex + raise ex if !ignore_exceptions + end + + item + end + + def self.from_json(json : JSON::Any, format = IO::ByteFormat::NetworkEndian) : Bytes + io = IO::Memory.new + from_json(json, io, format) + return io.to_slice + end + + def self.from_json(json : JSON::Any, io : IO, format = IO::ByteFormat::NetworkEndian) + case object = json.raw + when Hash + object.each do |key, value| + parts = key.split(":") + field = parts[0].to_i64 + type = parts[-1] + + header = (field << 3) | TAG_MAP[type] + VarLong.to_io(io, header) + + case type + when "varint" + VarLong.to_io(io, value.raw.as(Number).to_i64!) + when "int32" + value.raw.as(Number).to_i32!.to_io(io, IO::ByteFormat::LittleEndian) + when "float32" + value.raw.as(Number).to_f32!.to_io(io, IO::ByteFormat::LittleEndian) + when "int64" + value.raw.as(Number).to_i64!.to_io(io, IO::ByteFormat::LittleEndian) + when "float64" + value.raw.as(Number).to_f64!.to_io(io, IO::ByteFormat::LittleEndian) + when "string" + VarLong.to_io(io, value.as_s.bytesize.to_i64) + io.print value.as_s + when "base64" + buffer = IO::Memory.new + from_json(value, buffer) + buffer.rewind + + buffer = Base64.urlsafe_encode(buffer, padding: false) + VarLong.to_io(io, buffer.bytesize.to_i64) + buffer.to_s(io) + when "embedded" + buffer = IO::Memory.new + from_json(value, buffer) + buffer.rewind + + VarLong.to_io(io, buffer.bytesize.to_i64) + IO.copy(buffer, io) + when "bytes" + VarLong.to_io(io, value.size.to_i64) + value.as_a.each { |byte| io.write_byte byte.as_i.to_u8 } + end + end + else + raise "Invalid value #{json}" + end + end + + # Assumes the underlying value is an `Array` or `Hash` and returns its size. + # Raises if the underlying value is not an `Array` or `Hash`. + def size : Int + case object = @raw + when Array + object.size + when Hash + object.size + else + raise "Expected Array or Hash for #size, not #{object.class}" + end + end + + # Assumes the underlying value is an `Array` and returns the element + # at the given index. + # Raises if the underlying value is not an `Array`. + # def [](index : Int) : Any + # case object = @raw + # when Array + # object[index] + # else + # raise "Expected Array for #[](index : Int), not #{object.class}" + # end + # end + + def []=(key : String, value : Type) + case object = @raw + when Hash + object[key] = Protodec::Any.new(value) + else + raise "Expected Hash for #[]=(key : String, value : Type), not #{object.class}" + end + end + + # Assumes the underlying value is an `Array` and returns the element + # at the given index, or `nil` if out of bounds. + # Raises if the underlying value is not an `Array`. + def []?(index : Int) : Protodec::Any? + case object = @raw + when Array + object[index]? + else + raise "Expected Array for #[]?(index : Int), not #{object.class}" + end + end + + # Assumes the underlying value is a `Hash` and returns the element + # with the given key. + # Raises if the underlying value is not a `Hash`. + def [](key : String) : Protodec::Any + case object = @raw + when Hash + object[key].as(Protodec::Any) + else + raise "Expected Hash for #[](key : String), not #{object.class}" + end + end + + # Assumes the underlying value is a `Hash` and returns the element + # with the given key, or `nil` if the key is not present. + # Raises if the underlying value is not a `Hash`. + def []?(key : String) : Protodec::Any? + case object = @raw + when Hash + object[key]? + else + raise "Expected Hash for #[]?(key : String), not #{object.class}" + end + end + + # Traverses the depth of a structure and returns the value. + # Returns `nil` if not found. + def dig?(key : String | Int, *subkeys) + if (value = self[key]?) && value.responds_to?(:dig?) + value.dig?(*subkeys) + end + end + + # :nodoc: + def dig?(key : String | Int) + self[key]? + end + + # Traverses the depth of a structure and returns the value, otherwise raises. + def dig(key : String | Int, *subkeys) + if (value = self[key]) && value.responds_to?(:dig) + return value.dig(*subkeys) + end + raise "Protodec::Any value not diggable for key: #{key.inspect}" + end + + # :nodoc: + def dig(key : String | Int) + self[key] + end + + # Checks that the underlying value is `Nil`, and returns `nil`. + # Raises otherwise. + def as_nil : Nil + @raw.as(Nil) + end + + # Checks that the underlying value is `Bool`, and returns its value. + # Raises otherwise. + def as_bool : Bool + @raw.as(Bool) + end + + # Checks that the underlying value is `Bool`, and returns its value. + # Returns `nil` otherwise. + def as_bool? : Bool? + as_bool if @raw.is_a?(Bool) + end + + # Checks that the underlying value is `Int`, and returns its value as an `Int32`. + # Raises otherwise. + def as_i : Int32 + @raw.as(Int).to_i + end + + # Checks that the underlying value is `Int`, and returns its value as an `Int32`. + # Returns `nil` otherwise. + def as_i? : Int32? + as_i if @raw.is_a?(Int) + end + + # Checks that the underlying value is `Int`, and returns its value as an `Int64`. + # Raises otherwise. + def as_i64 : Int64 + @raw.as(Int).to_i64 + end + + # Checks that the underlying value is `Int`, and returns its value as an `Int64`. + # Returns `nil` otherwise. + def as_i64? : Int64? + as_i64 if @raw.is_a?(Int64) + end + + # Checks that the underlying value is `Float`, and returns its value as an `Float64`. + # Raises otherwise. + def as_f : Float64 + @raw.as(Float64) + end + + # Checks that the underlying value is `Float`, and returns its value as an `Float64`. + # Returns `nil` otherwise. + def as_f? : Float64? + @raw.as?(Float64) + end + + # Checks that the underlying value is `Float`, and returns its value as an `Float32`. + # Raises otherwise. + def as_f32 : Float32 + @raw.as(Float).to_f32 + end + + # Checks that the underlying value is `Float`, and returns its value as an `Float32`. + # Returns `nil` otherwise. + def as_f32? : Float32? + as_f32 if @raw.is_a?(Float) + end + + # Checks that the underlying value is `String`, and returns its value. + # Raises otherwise. + def as_s : String + @raw.as(String) + end + + # Checks that the underlying value is `String`, and returns its value. + # Returns `nil` otherwise. + def as_s? : String? + as_s if @raw.is_a?(String) + end + + # Checks that the underlying value is `Array`, and returns its value. + # Raises otherwise. + def as_a : Array(Any) + @raw.as(Array) + end + + # Checks that the underlying value is `Array`, and returns its value. + # Returns `nil` otherwise. + def as_a? : Array(Any)? + as_a if @raw.is_a?(Array) + end + + # Checks that the underlying value is `Hash`, and returns its value. + # Raises otherwise. + def as_h : Hash(String, Any) + @raw.as(Hash) + end + + # Checks that the underlying value is `Hash`, and returns its value. + # Returns `nil` otherwise. + def as_h? : Hash(String, Any)? + as_h if @raw.is_a?(Hash) + end + + # :nodoc: + def inspect(io : IO) : Nil + @raw.inspect(io) + end + + # :nodoc: + def to_s(io : IO) : Nil + @raw.to_s(io) + end + + # :nodoc: + def pretty_print(pp) + @raw.pretty_print(pp) + end + + # Returns `true` if both `self` and *other*'s raw object are equal. + def ==(other : Protodec::Any) + raw == other.raw + end + + # Returns `true` if the raw object is equal to *other*. + def ==(other) + raw == other + end + + # See `Object#hash(hasher)` + def_hash raw + + # :nodoc: + def to_json(json : JSON::Builder) + raw.to_json(json) + end + + def to_yaml(yaml : YAML::Nodes::Builder) + raw.to_yaml(yaml) + end + + # Returns a new Protodec::Any instance with the `raw` value `dup`ed. + def dup + Any.new(raw.dup) + end + + # Returns a new Protodec::Any instance with the `raw` value `clone`ed. + def clone + Any.new(raw.clone) + end + + def self.cast_json(object) + raise "Invalid type" if !object.is_a?(Hash) + + JSON::Any.new(object.transform_values do |value| + case value + when .is_a?(Hash) + cast_json(value) + else + JSON::Any.new(value) + end + end) + end + end +end