diff --git a/CHANGELOG.md b/CHANGELOG.md index 2cb3704..7ae8ca1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## [Unreleased] -* no unreleased changes * +### Added +* Column zipping functionality * ## 11.2.1 / 2024-11-18 ### Fixed diff --git a/docs/yaml-mapping-user-guide.md b/docs/yaml-mapping-user-guide.md index e54aa7b..ccf5d10 100644 --- a/docs/yaml-mapping-user-guide.md +++ b/docs/yaml-mapping-user-guide.md @@ -13,3 +13,4 @@ add_to_nav: true 6. [Non Tabular Mappings](non-tabular-mappings.md) 7. [Date Formats](date-formats.md) 8. [XML mappings](xml-mappings.md) +9. [Zipped Field Mapping](priority-field-mapping.md) \ No newline at end of file diff --git a/docs/zipped-field-mapping.md b/docs/zipped-field-mapping.md new file mode 100644 index 0000000..30b201b --- /dev/null +++ b/docs/zipped-field-mapping.md @@ -0,0 +1,45 @@ +--- +layout: page +title: Zipped Field Mapping +permalink: /zipped-field-mapping/ +--- + +Multiple incoming columns can be mapped to a single field, zipping array values (split from strings) together. + +The mapper will identify all the columns that have been mapped to a given `field`, with their `zip_order` and `split_char`. It will then take the string value for each of these and split them using the given `split_char`, resulting in an array of arrays. + +Then using the `zip_order`, it'll take the first array and zip in the remaining arrays in their `zip_order` + +Example mapping: + +--- + - column: title + mappings: + - field: zipped_field + zip_order: 1 + split_char: "," + - column: value + mappings: + - field: zipped_field + zip_order: 2 + +Example data: + +``` +"title","value" +"species,colour,legs","dog,brown,4" +``` + +This would result in: + +``` +{ "zipped_field"=>[["species", "dog"], ["colour", "brown"], ["legs", "4"]], + :rawtext=>{"title"=>"species,colour,legs", "value"=>"dog,brown,4"}} +``` + +Reversing the `zip_order` in the mapping would result in: +``` +{ "zipped_field"=>[["dog", "species"], ["brown", "colour"], ["4", "legs"]], + :rawtext=>{"title"=>"species,colour,legs", "value"=>"dog,brown,4"}} +``` + diff --git a/lib/ndr_import/mapper.rb b/lib/ndr_import/mapper.rb index bec7ac4..a2be28e 100644 --- a/lib/ndr_import/mapper.rb +++ b/lib/ndr_import/mapper.rb @@ -30,6 +30,8 @@ module Strings STANDARD_MAPPING = 'standard_mapping'.freeze UNPACK_PATTERN = 'unpack_pattern'.freeze VALIDATES = 'validates'.freeze + ZIP_ORDER = 'zip_order'.freeze + SPLIT_CHAR = 'split_char'.freeze end private @@ -119,9 +121,10 @@ def mapped_line(line, line_mappings) rawtext[rawtext_column_name] = raw_value next unless column_mapping.key?(Strings::MAPPINGS) + column_mapping[Strings::MAPPINGS].each do |field_mapping| # create a duplicate of the raw value we can manipulate - original_value = raw_value ? raw_value.dup : nil + original_value = raw_value&.dup replace_before_mapping(original_value, field_mapping) value = mapped_value(original_value, field_mapping) @@ -137,7 +140,8 @@ def mapped_line(line, line_mappings) data[field] ||= {} data[field][:values] ||= [] # "better" values come earlier - data[field][:compact] = true unless data[field].key?(:compact) + data[field][:zipped_values] ||= [] + data[field][:compact] = true unless data[field].key?(:compact) if field_mapping[Strings::ORDER] data[field][:join] ||= field_mapping[Strings::JOIN] @@ -148,6 +152,9 @@ def mapped_line(line, line_mappings) data[field][:values][field_mapping[Strings::ORDER] - 1] = value elsif field_mapping[Strings::PRIORITY] data[field][:values][field_mapping[Strings::PRIORITY]] = value + elsif field_zippable?(field_mapping, data[field]) + data[field][:split_char] ||= field_mapping[Strings::SPLIT_CHAR] + data[field][:zipped_values][field_mapping[Strings::ZIP_ORDER] - 1] = value else data[field][:values].unshift(value) # new "best" value end @@ -160,6 +167,7 @@ def mapped_line(line, line_mappings) # and one to many, for cross-populating data.each do |field, field_data| values = field_data[:values] + zipped_values = field_data[:zipped_values] attributes[field] = if field_data.key?(:join) @@ -167,6 +175,9 @@ def mapped_line(line, line_mappings) values = values.map(&:presence) values.compact! if field_data[:compact] values.join(field_data[:join]) + elsif zipped_values.present? + values = zipped_values.map { |value| value.split(field_data[:split_char]) } + values.first.zip(*values[1..]) else values.detect(&:present?) end @@ -176,6 +187,12 @@ def mapped_line(line, line_mappings) attributes end + def field_zippable?(field_mapping, data_field) + return false if field_mapping[Strings::ZIP_ORDER].blank? + + data_field[:split_char].present? || field_mapping[Strings::SPLIT_CHAR].present? + end + def mapped_value(original_value, field_mapping) if field_mapping.include?(Strings::FORMAT) begin diff --git a/test/mapper_test.rb b/test/mapper_test.rb index 7604376..edfec1d 100644 --- a/test/mapper_test.rb +++ b/test/mapper_test.rb @@ -61,6 +61,37 @@ def setup order: 2 YML + zip_mapping = YAML.safe_load <<-YML + - column: zip_column1 + mappings: + - field: zipped_field + zip_order: 1 + split_char: "," + - column: zip_column2 + mappings: + - field: zipped_field + zip_order: 2 + - column: zip_column3 + mappings: + - field: zipped_field + zip_order: 3 + YML + + zip_mapping_without_split_char = YAML.safe_load <<-YML + - column: zip_column1 + mappings: + - field: zipped_field + zip_order: 1 + - column: zip_column2 + mappings: + - field: zipped_field + zip_order: 2 + - column: zip_column3 + mappings: + - field: zipped_field + zip_order: 3 + YML + unused_mapping = [{ 'column' => 'extra', 'rawtext_name' => 'extra' }] cross_populate_mapping = YAML.load <<-YML @@ -423,6 +454,30 @@ def setup assert_equal 'Elizabeth', line_hash[:rawtext]['forename2'] end + test 'line mapping should create valid hash with zipped fields' do + test_line = ['hello1,hello2,hello3', 'world1,world2,world3', 'suffix1,suffix2,suffix3'] + line_hash = TestMapper.new.mapped_line(test_line, zip_mapping) + + expected_mapped_value = [ + %w[hello1 world1 suffix1], %w[hello2 world2 suffix2], %w[hello3 world3 suffix3] + ] + + assert_equal expected_mapped_value, line_hash['zipped_field'] + assert_equal 'hello1,hello2,hello3', line_hash[:rawtext]['zip_column1'] + assert_equal 'world1,world2,world3', line_hash[:rawtext]['zip_column2'] + assert_equal 'suffix1,suffix2,suffix3', line_hash[:rawtext]['zip_column3'] + end + + test 'line mapping should not attempt to zip fields without a split_char' do + test_line = ['hello1,hello2,hello3', 'world1,world2,world3', 'suffix1,suffix2,suffix3'] + line_hash = TestMapper.new.mapped_line(test_line, zip_mapping_without_split_char) + + assert_equal 'suffix1,suffix2,suffix3', line_hash['zipped_field'] + assert_equal 'hello1,hello2,hello3', line_hash[:rawtext]['zip_column1'] + assert_equal 'world1,world2,world3', line_hash[:rawtext]['zip_column2'] + assert_equal 'suffix1,suffix2,suffix3', line_hash[:rawtext]['zip_column3'] + end + test 'line mapping should create valid hash with rawtext only' do line_hash = TestMapper.new.mapped_line(['otherinfo'], unused_mapping) assert_equal 1, line_hash.length