From ef49d5cb77f312ddf918fc4ae42b7464e313b94e Mon Sep 17 00:00:00 2001 From: ollietulloch Date: Mon, 7 Oct 2024 19:33:48 +0100 Subject: [PATCH 1/5] Add column/data zipping functionality --- CHANGELOG.md | 2 +- lib/ndr_import/mapper.rb | 12 +++++++++++- test/mapper_test.rb | 30 ++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e09537..6e341b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## [Unreleased] ======= -* no unreleased changes * +* Column zipping functionality * ## 11.2.0 / 2024-04-10 ### Added diff --git a/lib/ndr_import/mapper.rb b/lib/ndr_import/mapper.rb index bec7ac4..3aa80ce 100644 --- a/lib/ndr_import/mapper.rb +++ b/lib/ndr_import/mapper.rb @@ -30,6 +30,8 @@ module Strings STANDARD_MAPPING = 'standard_mapping'.freeze UNPACK_PATTERN = 'unpack_pattern'.freeze VALIDATES = 'validates'.freeze + ZIP_ORDER = 'zip_order'.freeze + SPLIT_CHAR = 'split_char'.freeze end private @@ -137,7 +139,8 @@ def mapped_line(line, line_mappings) data[field] ||= {} data[field][:values] ||= [] # "better" values come earlier - data[field][:compact] = true unless data[field].key?(:compact) + data[field][:zipped_values] ||= [] + data[field][:compact] = true unless data[field].key?(:compact) if field_mapping[Strings::ORDER] data[field][:join] ||= field_mapping[Strings::JOIN] @@ -148,6 +151,9 @@ def mapped_line(line, line_mappings) data[field][:values][field_mapping[Strings::ORDER] - 1] = value elsif field_mapping[Strings::PRIORITY] data[field][:values][field_mapping[Strings::PRIORITY]] = value + elsif field_mapping[Strings::ZIP_ORDER] + data[field][:split_char] ||= field_mapping[Strings::SPLIT_CHAR] + data[field][:zipped_values][field_mapping[Strings::ZIP_ORDER] - 1] = value else data[field][:values].unshift(value) # new "best" value end @@ -160,6 +166,7 @@ def mapped_line(line, line_mappings) # and one to many, for cross-populating data.each do |field, field_data| values = field_data[:values] + zipped_values = field_data[:zipped_values] attributes[field] = if field_data.key?(:join) @@ -167,6 +174,9 @@ def mapped_line(line, line_mappings) values = values.map(&:presence) values.compact! if field_data[:compact] values.join(field_data[:join]) + elsif field_data[:zipped_values].present? + values = zipped_values.map { |value| value.split(field_data[:split_char]) } + values.first.zip(*values[1..]) else values.detect(&:present?) end diff --git a/test/mapper_test.rb b/test/mapper_test.rb index 7604376..a30fbda 100644 --- a/test/mapper_test.rb +++ b/test/mapper_test.rb @@ -61,6 +61,22 @@ def setup order: 2 YML + zip_mapping = YAML.load <<-YML + - column: zip_column1 + mappings: + - field: zipped_field + zip_order: 1 + split_char: "," + - column: zip_column2 + mappings: + - field: zipped_field + zip_order: 2 + - column: zip_column3 + mappings: + - field: zipped_field + zip_order: 3 + YML + unused_mapping = [{ 'column' => 'extra', 'rawtext_name' => 'extra' }] cross_populate_mapping = YAML.load <<-YML @@ -423,6 +439,20 @@ def setup assert_equal 'Elizabeth', line_hash[:rawtext]['forename2'] end + test 'line mapping should create valid hash with zipped fields' do + test_line = ['hello1,hello2,hello3', 'world1,world2,world3', 'suffix1,suffix2,suffix3'] + line_hash = TestMapper.new.mapped_line(test_line, zip_mapping) + + expected_mapped_value = [ + %w[hello1 world1 suffix1], %w[hello2 world2 suffix2], %w[hello3 world3 suffix3] + ] + + assert_equal expected_mapped_value, line_hash['zipped_field'] + assert_equal 'hello1,hello2,hello3', line_hash[:rawtext]['zip_column1'] + assert_equal 'world1,world2,world3', line_hash[:rawtext]['zip_column2'] + assert_equal 'suffix1,suffix2,suffix3', line_hash[:rawtext]['zip_column3'] + end + test 'line mapping should create valid hash with rawtext only' do line_hash = TestMapper.new.mapped_line(['otherinfo'], unused_mapping) assert_equal 1, line_hash.length From 40c45309bfebcf2e6c3e474833d98c906a667aa3 Mon Sep 17 00:00:00 2001 From: ollietulloch Date: Mon, 7 Oct 2024 19:54:59 +0100 Subject: [PATCH 2/5] Documentation --- docs/yaml-mapping-user-guide.md | 1 + docs/zipped-field-mapping.md | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 docs/zipped-field-mapping.md diff --git a/docs/yaml-mapping-user-guide.md b/docs/yaml-mapping-user-guide.md index e54aa7b..ccf5d10 100644 --- a/docs/yaml-mapping-user-guide.md +++ b/docs/yaml-mapping-user-guide.md @@ -13,3 +13,4 @@ add_to_nav: true 6. [Non Tabular Mappings](non-tabular-mappings.md) 7. [Date Formats](date-formats.md) 8. [XML mappings](xml-mappings.md) +9. [Zipped Field Mapping](priority-field-mapping.md) \ No newline at end of file diff --git a/docs/zipped-field-mapping.md b/docs/zipped-field-mapping.md new file mode 100644 index 0000000..a2e0350 --- /dev/null +++ b/docs/zipped-field-mapping.md @@ -0,0 +1,45 @@ +--- +layout: page +title: Zipped Field Mapping +permalink: /zipped-field-mapping/ +--- + +Multiple incoming columns can be mapped to a single field, zipping array values (split from strings) together. + +The mapper will identify all the columns that have been mapped to a given `field`, with their `zip_order` and `split_char`. It will then take the string value for each of these and split them using the given `split_char`, resulting in an array of arrays. + +Then using the `zip_order`, it'll take the first array and zip in the remaining arrays in their `zip_order` + +Example mapping: + +--- + - column: title + mappings: + - field: zipped_field + zip_order: 1 + split_char: "," + - column: value + mappings: + - field: zipped_field + zip_order: 2 + +Example data: + +``` +"title","value" +"species,colour,legs","dog,brown,4" +``` + +This would result in: + +``` +{ "zipped_field"=>[["species", "dog"], ["colour", "brown"], ["legs", "4"]], + :rawtext=>{"title"=>"species,colour,legs", "value"=>"dog,brown,4"}} +``` + +Reversing the `zip_order` in the mapping would result in: +``` +{ "zipped_field"=>[["dog", "species""], ["brown", "colour"], ["4", "legs"]], + :rawtext=>{"title"=>"species,colour,legs", "value"=>"dog,brown,4"}} +``` + From 6828784187919222e364a06d621b29783d02193b Mon Sep 17 00:00:00 2001 From: ollietulloch Date: Tue, 19 Nov 2024 13:35:27 +0000 Subject: [PATCH 3/5] Typo --- docs/zipped-field-mapping.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zipped-field-mapping.md b/docs/zipped-field-mapping.md index a2e0350..30b201b 100644 --- a/docs/zipped-field-mapping.md +++ b/docs/zipped-field-mapping.md @@ -39,7 +39,7 @@ This would result in: Reversing the `zip_order` in the mapping would result in: ``` -{ "zipped_field"=>[["dog", "species""], ["brown", "colour"], ["4", "legs"]], +{ "zipped_field"=>[["dog", "species"], ["brown", "colour"], ["4", "legs"]], :rawtext=>{"title"=>"species,colour,legs", "value"=>"dog,brown,4"}} ``` From 58a7b926e2747c08da87395f570b17a5c77dfdba Mon Sep 17 00:00:00 2001 From: ollietulloch Date: Tue, 19 Nov 2024 14:02:45 +0000 Subject: [PATCH 4/5] Ensure enough field mapping config is present before attempting to zip --- lib/ndr_import/mapper.rb | 10 ++++++++-- test/mapper_test.rb | 27 ++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/lib/ndr_import/mapper.rb b/lib/ndr_import/mapper.rb index 3aa80ce..d72f814 100644 --- a/lib/ndr_import/mapper.rb +++ b/lib/ndr_import/mapper.rb @@ -151,7 +151,7 @@ def mapped_line(line, line_mappings) data[field][:values][field_mapping[Strings::ORDER] - 1] = value elsif field_mapping[Strings::PRIORITY] data[field][:values][field_mapping[Strings::PRIORITY]] = value - elsif field_mapping[Strings::ZIP_ORDER] + elsif field_zippable?(field_mapping, data[field]) data[field][:split_char] ||= field_mapping[Strings::SPLIT_CHAR] data[field][:zipped_values][field_mapping[Strings::ZIP_ORDER] - 1] = value else @@ -174,7 +174,7 @@ def mapped_line(line, line_mappings) values = values.map(&:presence) values.compact! if field_data[:compact] values.join(field_data[:join]) - elsif field_data[:zipped_values].present? + elsif zipped_values.present? values = zipped_values.map { |value| value.split(field_data[:split_char]) } values.first.zip(*values[1..]) else @@ -186,6 +186,12 @@ def mapped_line(line, line_mappings) attributes end + def field_zippable?(field_mapping, data_field) + return false if field_mapping[Strings::ZIP_ORDER].blank? + + data_field[:split_char].present? || field_mapping[Strings::SPLIT_CHAR].present? + end + def mapped_value(original_value, field_mapping) if field_mapping.include?(Strings::FORMAT) begin diff --git a/test/mapper_test.rb b/test/mapper_test.rb index a30fbda..edfec1d 100644 --- a/test/mapper_test.rb +++ b/test/mapper_test.rb @@ -61,7 +61,7 @@ def setup order: 2 YML - zip_mapping = YAML.load <<-YML + zip_mapping = YAML.safe_load <<-YML - column: zip_column1 mappings: - field: zipped_field @@ -77,6 +77,21 @@ def setup zip_order: 3 YML + zip_mapping_without_split_char = YAML.safe_load <<-YML + - column: zip_column1 + mappings: + - field: zipped_field + zip_order: 1 + - column: zip_column2 + mappings: + - field: zipped_field + zip_order: 2 + - column: zip_column3 + mappings: + - field: zipped_field + zip_order: 3 + YML + unused_mapping = [{ 'column' => 'extra', 'rawtext_name' => 'extra' }] cross_populate_mapping = YAML.load <<-YML @@ -453,6 +468,16 @@ def setup assert_equal 'suffix1,suffix2,suffix3', line_hash[:rawtext]['zip_column3'] end + test 'line mapping should not attempt to zip fields without a split_char' do + test_line = ['hello1,hello2,hello3', 'world1,world2,world3', 'suffix1,suffix2,suffix3'] + line_hash = TestMapper.new.mapped_line(test_line, zip_mapping_without_split_char) + + assert_equal 'suffix1,suffix2,suffix3', line_hash['zipped_field'] + assert_equal 'hello1,hello2,hello3', line_hash[:rawtext]['zip_column1'] + assert_equal 'world1,world2,world3', line_hash[:rawtext]['zip_column2'] + assert_equal 'suffix1,suffix2,suffix3', line_hash[:rawtext]['zip_column3'] + end + test 'line mapping should create valid hash with rawtext only' do line_hash = TestMapper.new.mapped_line(['otherinfo'], unused_mapping) assert_equal 1, line_hash.length From 175df03841cfdd351d09377d242df36c355ef39b Mon Sep 17 00:00:00 2001 From: ollietulloch Date: Tue, 19 Nov 2024 14:04:33 +0000 Subject: [PATCH 5/5] Light boyscouting --- lib/ndr_import/mapper.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/ndr_import/mapper.rb b/lib/ndr_import/mapper.rb index d72f814..a2be28e 100644 --- a/lib/ndr_import/mapper.rb +++ b/lib/ndr_import/mapper.rb @@ -121,9 +121,10 @@ def mapped_line(line, line_mappings) rawtext[rawtext_column_name] = raw_value next unless column_mapping.key?(Strings::MAPPINGS) + column_mapping[Strings::MAPPINGS].each do |field_mapping| # create a duplicate of the raw value we can manipulate - original_value = raw_value ? raw_value.dup : nil + original_value = raw_value&.dup replace_before_mapping(original_value, field_mapping) value = mapped_value(original_value, field_mapping)