diff --git a/app/models/changeset.rb b/app/models/changeset.rb index 9c0dc68de..869e9bad9 100644 --- a/app/models/changeset.rb +++ b/app/models/changeset.rb @@ -255,8 +255,8 @@ class Changeset < ActiveRecord::Base str.force_encoding("UTF-8") if str.respond_to?(:force_encoding) return str end + enc = encoding.blank? ? "UTF-8" : encoding if str.respond_to?(:force_encoding) - enc = encoding.blank? ? "UTF-8" : encoding if enc != "UTF-8" str.force_encoding(enc) str = str.encode("UTF-8", :invalid => :replace, @@ -269,19 +269,18 @@ class Changeset < ActiveRecord::Base end end else - unless encoding.blank? || encoding == 'UTF-8' - begin - str = Iconv.conv('UTF-8', encoding, str) - rescue Iconv::Failure - # do nothing here - end - end - # removes invalid UTF8 sequences + ic = Iconv.new('UTF-8', enc) + txtar = "" begin - str = Iconv.conv('UTF-8//IGNORE', 'UTF-8', str + ' ')[0..-3] - rescue Iconv::InvalidEncoding - # "UTF-8//IGNORE" is not supported on some OS + txtar += ic.iconv(str) + rescue Iconv::IllegalSequence + txtar += $!.success + str = '?' + $!.failed[1,$!.failed.length] + retry + rescue + txtar += $!.success end + str = txtar end str end diff --git a/test/unit/changeset_test.rb b/test/unit/changeset_test.rb index 93027d849..8e98b0574 100644 --- a/test/unit/changeset_test.rb +++ b/test/unit/changeset_test.rb @@ -21,7 +21,8 @@ require File.expand_path('../../test_helper', __FILE__) class ChangesetTest < ActiveSupport::TestCase fixtures :projects, :repositories, :issues, :issue_statuses, - :changesets, :changes, :issue_categories, :enumerations, :custom_fields, :custom_values, :users, :members, :member_roles, :trackers + :changesets, :changes, :issue_categories, :enumerations, + :custom_fields, :custom_values, :users, :members, :member_roles, :trackers def setup end @@ -250,29 +251,26 @@ class ChangesetTest < ActiveSupport::TestCase assert_equal str_utf8, c.comments end - def test_invalid_utf8_sequences_in_comments_should_be_stripped + def test_invalid_utf8_sequences_in_comments_should_be_replaced_latin1 proj = Project.find(3) # str = File.read("#{RAILS_ROOT}/test/fixtures/encoding/iso-8859-1.txt") str = "Texte encod\xe9 en ISO-8859-1." str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding) r = Repository::Bazaar.create!( - :project => proj, :url => '/tmp/test/bazaar', + :project => proj, + :url => '/tmp/test/bazaar', :log_encoding => 'UTF-8' ) assert r - c = Changeset.new(:repository => r, + c = Changeset.new(:repository => r, :committed_on => Time.now, - :revision => '123', - :scmid => '12345', - :comments => str) + :revision => '123', + :scmid => '12345', + :comments => str) assert( c.save ) - if str.respond_to?(:force_encoding) - assert_equal "Texte encod? en ISO-8859-1.", c.comments - else - assert_equal "Texte encod en ISO-8859-1.", c.comments - end + assert_equal "Texte encod? en ISO-8859-1.", c.comments end - def test_invalid_utf8_sequences_in_comments_should_be_stripped_ja_jis + def test_invalid_utf8_sequences_in_comments_should_be_replaced_ja_jis proj = Project.find(3) str = "test\xb5\xfetest\xb5\xfe" if str.respond_to?(:force_encoding) @@ -280,7 +278,7 @@ class ChangesetTest < ActiveSupport::TestCase end r = Repository::Bazaar.create!( :project => proj, - :url => '/tmp/test/bazaar', + :url => '/tmp/test/bazaar', :log_encoding => 'ISO-2022-JP' ) assert r c = Changeset.new(:repository => r, @@ -289,11 +287,7 @@ class ChangesetTest < ActiveSupport::TestCase :scmid => '12345', :comments => str) assert( c.save ) - if str.respond_to?(:force_encoding) - assert_equal "test??test??", c.comments - else - assert_equal "testtest", c.comments - end + assert_equal "test??test??", c.comments end def test_comments_should_be_converted_all_latin1_to_utf8