Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## [Unreleased]
* no unreleased changes *
### Added
* Column zipping functionality *

## 11.2.1 / 2024-11-18
### Fixed
Expand Down
1 change: 1 addition & 0 deletions docs/yaml-mapping-user-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ add_to_nav: true
6. [Non Tabular Mappings](non-tabular-mappings.md)
7. [Date Formats](date-formats.md)
8. [XML mappings](xml-mappings.md)
9. [Zipped Field Mapping](priority-field-mapping.md)
45 changes: 45 additions & 0 deletions docs/zipped-field-mapping.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
layout: page
title: Zipped Field Mapping
permalink: /zipped-field-mapping/
---

Multiple incoming columns can be mapped to a single field, zipping array values (split from strings) together.

The mapper will identify all the columns that have been mapped to a given `field`, with their `zip_order` and `split_char`. It will then take the string value for each of these and split them using the given `split_char`, resulting in an array of arrays.

Then using the `zip_order`, it'll take the first array and zip in the remaining arrays in their `zip_order`

Example mapping:

---
- column: title
mappings:
- field: zipped_field
zip_order: 1
split_char: ","
- column: value
mappings:
- field: zipped_field
zip_order: 2

Example data:

```
"title","value"
"species,colour,legs","dog,brown,4"
```

This would result in:

```
{ "zipped_field"=>[["species", "dog"], ["colour", "brown"], ["legs", "4"]],
:rawtext=>{"title"=>"species,colour,legs", "value"=>"dog,brown,4"}}
```

Reversing the `zip_order` in the mapping would result in:
```
{ "zipped_field"=>[["dog", "species"], ["brown", "colour"], ["4", "legs"]],
:rawtext=>{"title"=>"species,colour,legs", "value"=>"dog,brown,4"}}
```

21 changes: 19 additions & 2 deletions lib/ndr_import/mapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ module Strings
STANDARD_MAPPING = 'standard_mapping'.freeze
UNPACK_PATTERN = 'unpack_pattern'.freeze
VALIDATES = 'validates'.freeze
ZIP_ORDER = 'zip_order'.freeze
SPLIT_CHAR = 'split_char'.freeze
end

private
Expand Down Expand Up @@ -119,9 +121,10 @@ def mapped_line(line, line_mappings)
rawtext[rawtext_column_name] = raw_value

next unless column_mapping.key?(Strings::MAPPINGS)

column_mapping[Strings::MAPPINGS].each do |field_mapping|
# create a duplicate of the raw value we can manipulate
original_value = raw_value ? raw_value.dup : nil
original_value = raw_value&.dup

replace_before_mapping(original_value, field_mapping)
value = mapped_value(original_value, field_mapping)
Expand All @@ -137,7 +140,8 @@ def mapped_line(line, line_mappings)

data[field] ||= {}
data[field][:values] ||= [] # "better" values come earlier
data[field][:compact] = true unless data[field].key?(:compact)
data[field][:zipped_values] ||= []
data[field][:compact] = true unless data[field].key?(:compact)

if field_mapping[Strings::ORDER]
data[field][:join] ||= field_mapping[Strings::JOIN]
Expand All @@ -148,6 +152,9 @@ def mapped_line(line, line_mappings)
data[field][:values][field_mapping[Strings::ORDER] - 1] = value
elsif field_mapping[Strings::PRIORITY]
data[field][:values][field_mapping[Strings::PRIORITY]] = value
elsif field_zippable?(field_mapping, data[field])
data[field][:split_char] ||= field_mapping[Strings::SPLIT_CHAR]
data[field][:zipped_values][field_mapping[Strings::ZIP_ORDER] - 1] = value
else
data[field][:values].unshift(value) # new "best" value
end
Expand All @@ -160,13 +167,17 @@ def mapped_line(line, line_mappings)
# and one to many, for cross-populating
data.each do |field, field_data|
values = field_data[:values]
zipped_values = field_data[:zipped_values]

attributes[field] =
if field_data.key?(:join)
# Map "blank" values to nil:
values = values.map(&:presence)
values.compact! if field_data[:compact]
values.join(field_data[:join])
elsif zipped_values.present?
values = zipped_values.map { |value| value.split(field_data[:split_char]) }
values.first.zip(*values[1..])
else
values.detect(&:present?)
end
Expand All @@ -176,6 +187,12 @@ def mapped_line(line, line_mappings)
attributes
end

def field_zippable?(field_mapping, data_field)
return false if field_mapping[Strings::ZIP_ORDER].blank?

data_field[:split_char].present? || field_mapping[Strings::SPLIT_CHAR].present?
end

def mapped_value(original_value, field_mapping)
if field_mapping.include?(Strings::FORMAT)
begin
Expand Down
55 changes: 55 additions & 0 deletions test/mapper_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,37 @@ def setup
order: 2
YML

zip_mapping = YAML.safe_load <<-YML
- column: zip_column1
mappings:
- field: zipped_field
zip_order: 1
split_char: ","
- column: zip_column2
mappings:
- field: zipped_field
zip_order: 2
- column: zip_column3
mappings:
- field: zipped_field
zip_order: 3
YML

zip_mapping_without_split_char = YAML.safe_load <<-YML
- column: zip_column1
mappings:
- field: zipped_field
zip_order: 1
- column: zip_column2
mappings:
- field: zipped_field
zip_order: 2
- column: zip_column3
mappings:
- field: zipped_field
zip_order: 3
YML

unused_mapping = [{ 'column' => 'extra', 'rawtext_name' => 'extra' }]

cross_populate_mapping = YAML.load <<-YML
Expand Down Expand Up @@ -423,6 +454,30 @@ def setup
assert_equal 'Elizabeth', line_hash[:rawtext]['forename2']
end

test 'line mapping should create valid hash with zipped fields' do
test_line = ['hello1,hello2,hello3', 'world1,world2,world3', 'suffix1,suffix2,suffix3']
line_hash = TestMapper.new.mapped_line(test_line, zip_mapping)

expected_mapped_value = [
%w[hello1 world1 suffix1], %w[hello2 world2 suffix2], %w[hello3 world3 suffix3]
]

assert_equal expected_mapped_value, line_hash['zipped_field']
assert_equal 'hello1,hello2,hello3', line_hash[:rawtext]['zip_column1']
assert_equal 'world1,world2,world3', line_hash[:rawtext]['zip_column2']
assert_equal 'suffix1,suffix2,suffix3', line_hash[:rawtext]['zip_column3']
end

test 'line mapping should not attempt to zip fields without a split_char' do
test_line = ['hello1,hello2,hello3', 'world1,world2,world3', 'suffix1,suffix2,suffix3']
line_hash = TestMapper.new.mapped_line(test_line, zip_mapping_without_split_char)

assert_equal 'suffix1,suffix2,suffix3', line_hash['zipped_field']
assert_equal 'hello1,hello2,hello3', line_hash[:rawtext]['zip_column1']
assert_equal 'world1,world2,world3', line_hash[:rawtext]['zip_column2']
assert_equal 'suffix1,suffix2,suffix3', line_hash[:rawtext]['zip_column3']
end

test 'line mapping should create valid hash with rawtext only' do
line_hash = TestMapper.new.mapped_line(['otherinfo'], unused_mapping)
assert_equal 1, line_hash.length
Expand Down