From 81bdbd77f7c976ae6e5b503139e8912694bd944b Mon Sep 17 00:00:00 2001 From: Toshi MARUYAMA Date: Sat, 9 Apr 2011 05:41:12 +0000 Subject: [PATCH 1/9] scm: not use Iconv for log converting in Ruby 1.9 and fix tests fails in Ruby 1.9. git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@5367 e93f8b46-1217-0410-a6f0-8f06a7374b81 --- app/models/changeset.rb | 25 ++++++++++++++++--------- test/unit/changeset_test.rb | 4 ++-- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/app/models/changeset.rb b/app/models/changeset.rb index 4fdaf665..6a0f55c1 100644 --- a/app/models/changeset.rb +++ b/app/models/changeset.rb @@ -239,21 +239,28 @@ class Changeset < ActiveRecord::Base private def self.to_utf8(str, encoding) - return str if str.blank? - unless encoding.blank? || encoding == 'UTF-8' - begin - str = Iconv.conv('UTF-8', encoding, str) - rescue Iconv::Failure - # do nothing here - end - end + return str if str.nil? + str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding) + return str if str.empty? + str.force_encoding("UTF-8") if str.respond_to?(:force_encoding) if str.respond_to?(:force_encoding) - str.force_encoding('UTF-8') + enc = encoding.blank? ? "UTF-8" : encoding + if enc != "UTF-8" + str.force_encoding(enc) + str = str.encode("UTF-8") + end if ! str.valid_encoding? str = str.encode("US-ASCII", :invalid => :replace, :undef => :replace, :replace => '?').encode("UTF-8") end else + unless encoding.blank? || encoding == 'UTF-8' + begin + str = Iconv.conv('UTF-8', encoding, str) + rescue Iconv::Failure + # do nothing here + end + end # removes invalid UTF8 sequences begin str = Iconv.conv('UTF-8//IGNORE', 'UTF-8', str + ' ')[0..-3] diff --git a/test/unit/changeset_test.rb b/test/unit/changeset_test.rb index 34520b26..0c2c27c0 100644 --- a/test/unit/changeset_test.rb +++ b/test/unit/changeset_test.rb @@ -258,9 +258,9 @@ class ChangesetTest < ActiveSupport::TestCase def test_comments_should_be_converted_all_latin1_to_utf8 s1 = "\xC2\x80" s2 = "\xc3\x82\xc2\x80" + s4 = s2.dup if s1.respond_to?(:force_encoding) s3 = s1.dup - s4 = s2.dup s1.force_encoding('ASCII-8BIT') s2.force_encoding('ASCII-8BIT') s3.force_encoding('ISO-8859-1') @@ -278,7 +278,7 @@ class ChangesetTest < ActiveSupport::TestCase :scmid => '12345', :comments => s1) assert( c.save ) - assert_equal s2, c.comments + assert_equal s4, c.comments end def test_identifier From 60b34ea29b9687941037b4a3d6d79914a1a0e401 Mon Sep 17 00:00:00 2001 From: Toshi MARUYAMA Date: Sat, 9 Apr 2011 06:34:33 +0000 Subject: [PATCH 2/9] scm: set empty log encoding UTF-8 in Ruby 1.9 and add tests. git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@5368 e93f8b46-1217-0410-a6f0-8f06a7374b81 --- app/models/changeset.rb | 5 ++++- test/unit/changeset_test.rb | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/app/models/changeset.rb b/app/models/changeset.rb index 6a0f55c1..c227b7ec 100644 --- a/app/models/changeset.rb +++ b/app/models/changeset.rb @@ -241,7 +241,10 @@ class Changeset < ActiveRecord::Base def self.to_utf8(str, encoding) return str if str.nil? str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding) - return str if str.empty? + if str.empty? + str.force_encoding("UTF-8") if str.respond_to?(:force_encoding) + return str + end str.force_encoding("UTF-8") if str.respond_to?(:force_encoding) if str.respond_to?(:force_encoding) enc = encoding.blank? ? "UTF-8" : encoding diff --git a/test/unit/changeset_test.rb b/test/unit/changeset_test.rb index 0c2c27c0..1c64d434 100644 --- a/test/unit/changeset_test.rb +++ b/test/unit/changeset_test.rb @@ -281,6 +281,42 @@ class ChangesetTest < ActiveSupport::TestCase assert_equal s4, c.comments end + def test_comments_nil + proj = Project.find(3) + r = Repository::Bazaar.create!( + :project => proj, :url => '/tmp/test/bazaar', + :log_encoding => 'ISO-8859-1' ) + assert r + c = Changeset.new(:repository => r, + :committed_on => Time.now, + :revision => '123', + :scmid => '12345', + :comments => nil) + assert( c.save ) + assert_equal "", c.comments + if c.comments.respond_to?(:force_encoding) + assert_equal "UTF-8", c.comments.encoding.to_s + end + end + + def test_comments_empty + proj = Project.find(3) + r = Repository::Bazaar.create!( + :project => proj, :url => '/tmp/test/bazaar', + :log_encoding => 'ISO-8859-1' ) + assert r + c = Changeset.new(:repository => r, + :committed_on => Time.now, + :revision => '123', + :scmid => '12345', + :comments => "") + assert( c.save ) + assert_equal "", c.comments + if c.comments.respond_to?(:force_encoding) + assert_equal "UTF-8", c.comments.encoding.to_s + end + end + def test_identifier c = Changeset.find_by_revision('1') assert_equal c.revision, c.identifier From 55111556748e2ed3a109f86f911df178ea6f26d9 Mon Sep 17 00:00:00 2001 From: Toshi MARUYAMA Date: Sat, 9 Apr 2011 08:07:22 +0000 Subject: [PATCH 3/9] scm: fix log converting error in Ruby 1.9 and add more tests. git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@5370 e93f8b46-1217-0410-a6f0-8f06a7374b81 --- app/models/changeset.rb | 3 ++- test/unit/changeset_test.rb | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/app/models/changeset.rb b/app/models/changeset.rb index c227b7ec..9220282d 100644 --- a/app/models/changeset.rb +++ b/app/models/changeset.rb @@ -250,7 +250,8 @@ class Changeset < ActiveRecord::Base enc = encoding.blank? ? "UTF-8" : encoding if enc != "UTF-8" str.force_encoding(enc) - str = str.encode("UTF-8") + str = str.encode("UTF-8", :invalid => :replace, + :undef => :replace, :replace => '?') end if ! str.valid_encoding? str = str.encode("US-ASCII", :invalid => :replace, diff --git a/test/unit/changeset_test.rb b/test/unit/changeset_test.rb index 1c64d434..92a9aeb9 100644 --- a/test/unit/changeset_test.rb +++ b/test/unit/changeset_test.rb @@ -255,6 +255,30 @@ class ChangesetTest < ActiveSupport::TestCase end end + def test_invalid_utf8_sequences_in_comments_should_be_stripped_ja_jis + proj = Project.find(3) + str = "test\xb5\xfetest\xb5\xfe" + if str.respond_to?(:force_encoding) + str.force_encoding('ASCII-8BIT') + end + r = Repository::Bazaar.create!( + :project => proj, + :url => '/tmp/test/bazaar', + :log_encoding => 'ISO-2022-JP' ) + assert r + c = Changeset.new(:repository => r, + :committed_on => Time.now, + :revision => '123', + :scmid => '12345', + :comments => str) + assert( c.save ) + if str.respond_to?(:force_encoding) + assert_equal "test??test??", c.comments + else + assert_equal "testtest", c.comments + end + end + def test_comments_should_be_converted_all_latin1_to_utf8 s1 = "\xC2\x80" s2 = "\xc3\x82\xc2\x80" From 0adf1a87b1a138ae60f58290e1b018a13222210c Mon Sep 17 00:00:00 2001 From: Toshi MARUYAMA Date: Sat, 9 Apr 2011 08:19:55 +0000 Subject: [PATCH 4/9] scm: code clean up app/models/changeset.rb. git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@5371 e93f8b46-1217-0410-a6f0-8f06a7374b81 --- app/models/changeset.rb | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/app/models/changeset.rb b/app/models/changeset.rb index 9220282d..ad4ac3b2 100644 --- a/app/models/changeset.rb +++ b/app/models/changeset.rb @@ -245,17 +245,18 @@ class Changeset < ActiveRecord::Base str.force_encoding("UTF-8") if str.respond_to?(:force_encoding) return str end - str.force_encoding("UTF-8") if str.respond_to?(:force_encoding) if str.respond_to?(:force_encoding) enc = encoding.blank? ? "UTF-8" : encoding if enc != "UTF-8" str.force_encoding(enc) str = str.encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => '?') - end - if ! str.valid_encoding? - str = str.encode("US-ASCII", :invalid => :replace, - :undef => :replace, :replace => '?').encode("UTF-8") + else + str.force_encoding("UTF-8") + if ! str.valid_encoding? + str = str.encode("US-ASCII", :invalid => :replace, + :undef => :replace, :replace => '?').encode("UTF-8") + end end else unless encoding.blank? || encoding == 'UTF-8' From e19c56d61a73255f648a66156c6b0b70a684c312 Mon Sep 17 00:00:00 2001 From: Toshi MARUYAMA Date: Sat, 9 Apr 2011 09:31:14 +0000 Subject: [PATCH 5/9] scm: replace invalid utf-8 sequences in comments instead of stripping on Ruby 1.8. git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@5373 e93f8b46-1217-0410-a6f0-8f06a7374b81 --- app/models/changeset.rb | 23 +++++++++++------------ test/unit/changeset_test.rb | 33 ++++++++++++++------------------- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/app/models/changeset.rb b/app/models/changeset.rb index ad4ac3b2..bdf25d39 100644 --- a/app/models/changeset.rb +++ b/app/models/changeset.rb @@ -245,8 +245,8 @@ class Changeset < ActiveRecord::Base str.force_encoding("UTF-8") if str.respond_to?(:force_encoding) return str end + enc = encoding.blank? ? "UTF-8" : encoding if str.respond_to?(:force_encoding) - enc = encoding.blank? ? "UTF-8" : encoding if enc != "UTF-8" str.force_encoding(enc) str = str.encode("UTF-8", :invalid => :replace, @@ -259,19 +259,18 @@ class Changeset < ActiveRecord::Base end end else - unless encoding.blank? || encoding == 'UTF-8' - begin - str = Iconv.conv('UTF-8', encoding, str) - rescue Iconv::Failure - # do nothing here - end - end - # removes invalid UTF8 sequences + ic = Iconv.new('UTF-8', enc) + txtar = "" begin - str = Iconv.conv('UTF-8//IGNORE', 'UTF-8', str + ' ')[0..-3] - rescue Iconv::InvalidEncoding - # "UTF-8//IGNORE" is not supported on some OS + txtar += ic.iconv(str) + rescue Iconv::IllegalSequence + txtar += $!.success + str = '?' + $!.failed[1,$!.failed.length] + retry + rescue + txtar += $!.success end + str = txtar end str end diff --git a/test/unit/changeset_test.rb b/test/unit/changeset_test.rb index 92a9aeb9..8496238b 100644 --- a/test/unit/changeset_test.rb +++ b/test/unit/changeset_test.rb @@ -14,7 +14,9 @@ require File.expand_path('../../test_helper', __FILE__) class ChangesetTest < ActiveSupport::TestCase - fixtures :projects, :repositories, :issues, :issue_statuses, :changesets, :changes, :issue_categories, :enumerations, :custom_fields, :custom_values, :users, :members, :member_roles, :trackers + fixtures :projects, :repositories, :issues, :issue_statuses, + :changesets, :changes, :issue_categories, :enumerations, + :custom_fields, :custom_values, :users, :members, :member_roles, :trackers def setup end @@ -235,27 +237,24 @@ class ChangesetTest < ActiveSupport::TestCase assert_equal "Texte encodé en ISO-8859-1.", c.comments end - def test_invalid_utf8_sequences_in_comments_should_be_stripped + def test_invalid_utf8_sequences_in_comments_should_be_replaced_latin1 proj = Project.find(3) str = File.read("#{RAILS_ROOT}/test/fixtures/encoding/iso-8859-1.txt") r = Repository::Bazaar.create!( - :project => proj, :url => '/tmp/test/bazaar', + :project => proj, + :url => '/tmp/test/bazaar', :log_encoding => 'UTF-8' ) assert r - c = Changeset.new(:repository => r, + c = Changeset.new(:repository => r, :committed_on => Time.now, - :revision => '123', - :scmid => '12345', - :comments => str) + :revision => '123', + :scmid => '12345', + :comments => str) assert( c.save ) - if str.respond_to?(:force_encoding) - assert_equal "Texte encod? en ISO-8859-1.", c.comments - else - assert_equal "Texte encod en ISO-8859-1.", c.comments - end + assert_equal "Texte encod? en ISO-8859-1.", c.comments end - def test_invalid_utf8_sequences_in_comments_should_be_stripped_ja_jis + def test_invalid_utf8_sequences_in_comments_should_be_replaced_ja_jis proj = Project.find(3) str = "test\xb5\xfetest\xb5\xfe" if str.respond_to?(:force_encoding) @@ -263,7 +262,7 @@ class ChangesetTest < ActiveSupport::TestCase end r = Repository::Bazaar.create!( :project => proj, - :url => '/tmp/test/bazaar', + :url => '/tmp/test/bazaar', :log_encoding => 'ISO-2022-JP' ) assert r c = Changeset.new(:repository => r, @@ -272,11 +271,7 @@ class ChangesetTest < ActiveSupport::TestCase :scmid => '12345', :comments => str) assert( c.save ) - if str.respond_to?(:force_encoding) - assert_equal "test??test??", c.comments - else - assert_equal "testtest", c.comments - end + assert_equal "test??test??", c.comments end def test_comments_should_be_converted_all_latin1_to_utf8 From 6efba7b05d0438d3b9698316794029f220226ab7 Mon Sep 17 00:00:00 2001 From: Toshi MARUYAMA Date: Sat, 9 Apr 2011 23:21:41 +0000 Subject: [PATCH 6/9] scm: use upcase to compare encoding name "UTF-8" in log converting. git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@5375 e93f8b46-1217-0410-a6f0-8f06a7374b81 --- app/models/changeset.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/changeset.rb b/app/models/changeset.rb index bdf25d39..6804869c 100644 --- a/app/models/changeset.rb +++ b/app/models/changeset.rb @@ -247,7 +247,7 @@ class Changeset < ActiveRecord::Base end enc = encoding.blank? ? "UTF-8" : encoding if str.respond_to?(:force_encoding) - if enc != "UTF-8" + if enc.upcase != "UTF-8" str.force_encoding(enc) str = str.encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => '?') From 0f7ae9b6583ecfc96600dc1497c56323fbdafefc Mon Sep 17 00:00:00 2001 From: Jean-Philippe Lang Date: Sun, 11 Apr 2010 13:55:30 +0000 Subject: [PATCH 7/9] Fixed: Update of Subversion changesets broken by r3466 under Solaris (#5255). git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@3635 e93f8b46-1217-0410-a6f0-8f06a7374b81 --- app/models/changeset.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/app/models/changeset.rb b/app/models/changeset.rb index 6804869c..971edaf5 100644 --- a/app/models/changeset.rb +++ b/app/models/changeset.rb @@ -272,6 +272,12 @@ class Changeset < ActiveRecord::Base end str = txtar end - str + # removes invalid UTF8 sequences + begin + Iconv.conv('UTF-8//IGNORE', 'UTF-8', str + ' ')[0..-3] + rescue Iconv::InvalidEncoding + # "UTF-8//IGNORE" is not supported on some OS + str + end end end From f518251d5f6a0e53ad6cc4cc188f66487cea4f8e Mon Sep 17 00:00:00 2001 From: Eric Davis Date: Fri, 3 Jun 2011 10:46:46 -0700 Subject: [PATCH 8/9] [#444] Fix Changeset#to_utf8 coding standards --- app/models/changeset.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/models/changeset.rb b/app/models/changeset.rb index 971edaf5..bf6f6642 100644 --- a/app/models/changeset.rb +++ b/app/models/changeset.rb @@ -245,21 +245,21 @@ class Changeset < ActiveRecord::Base str.force_encoding("UTF-8") if str.respond_to?(:force_encoding) return str end - enc = encoding.blank? ? "UTF-8" : encoding + normalized_encoding = encoding.blank? ? "UTF-8" : encoding if str.respond_to?(:force_encoding) - if enc.upcase != "UTF-8" - str.force_encoding(enc) + if normalized_encoding.upcase != "UTF-8" + str.force_encoding(normalized_encoding) str = str.encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => '?') else str.force_encoding("UTF-8") - if ! str.valid_encoding? + unless str.valid_encoding? str = str.encode("US-ASCII", :invalid => :replace, :undef => :replace, :replace => '?').encode("UTF-8") end end else - ic = Iconv.new('UTF-8', enc) + ic = Iconv.new('UTF-8', normalized_encoding) txtar = "" begin txtar += ic.iconv(str) From ba3ddee99da5ec49aca90517233d7548947a2ea2 Mon Sep 17 00:00:00 2001 From: Eric Davis Date: Fri, 3 Jun 2011 10:50:16 -0700 Subject: [PATCH 9/9] [#444] Remove extra variable --- app/models/changeset.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/changeset.rb b/app/models/changeset.rb index bf6f6642..d6ab9142 100644 --- a/app/models/changeset.rb +++ b/app/models/changeset.rb @@ -259,10 +259,10 @@ class Changeset < ActiveRecord::Base end end else - ic = Iconv.new('UTF-8', normalized_encoding) + txtar = "" begin - txtar += ic.iconv(str) + txtar += Iconv.new('UTF-8', normalized_encoding).iconv(str) rescue Iconv::IllegalSequence txtar += $!.success str = '?' + $!.failed[1,$!.failed.length]