diff --git a/app/models/changeset.rb b/app/models/changeset.rb index 4fdaf665..6a0f55c1 100644 --- a/app/models/changeset.rb +++ b/app/models/changeset.rb @@ -239,21 +239,28 @@ class Changeset < ActiveRecord::Base private def self.to_utf8(str, encoding) - return str if str.blank? - unless encoding.blank? || encoding == 'UTF-8' - begin - str = Iconv.conv('UTF-8', encoding, str) - rescue Iconv::Failure - # do nothing here - end - end + return str if str.nil? + str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding) + return str if str.empty? + str.force_encoding("UTF-8") if str.respond_to?(:force_encoding) if str.respond_to?(:force_encoding) - str.force_encoding('UTF-8') + enc = encoding.blank? ? "UTF-8" : encoding + if enc != "UTF-8" + str.force_encoding(enc) + str = str.encode("UTF-8") + end if ! str.valid_encoding? str = str.encode("US-ASCII", :invalid => :replace, :undef => :replace, :replace => '?').encode("UTF-8") end else + unless encoding.blank? || encoding == 'UTF-8' + begin + str = Iconv.conv('UTF-8', encoding, str) + rescue Iconv::Failure + # do nothing here + end + end # removes invalid UTF8 sequences begin str = Iconv.conv('UTF-8//IGNORE', 'UTF-8', str + ' ')[0..-3] diff --git a/test/unit/changeset_test.rb b/test/unit/changeset_test.rb index 34520b26..0c2c27c0 100644 --- a/test/unit/changeset_test.rb +++ b/test/unit/changeset_test.rb @@ -258,9 +258,9 @@ class ChangesetTest < ActiveSupport::TestCase def test_comments_should_be_converted_all_latin1_to_utf8 s1 = "\xC2\x80" s2 = "\xc3\x82\xc2\x80" + s4 = s2.dup if s1.respond_to?(:force_encoding) s3 = s1.dup - s4 = s2.dup s1.force_encoding('ASCII-8BIT') s2.force_encoding('ASCII-8BIT') s3.force_encoding('ISO-8859-1') @@ -278,7 +278,7 @@ class ChangesetTest < ActiveSupport::TestCase :scmid => '12345', :comments => s1) assert( c.save ) - assert_equal s2, c.comments + assert_equal s4, c.comments end def test_identifier