let
words1 = "together",
words2 = "to their",
simpleLikeFuzzy = (words1 as text, words2 as text) as number =>
2 * List.Count(List.Intersect({Text.ToList(words1), Text.ToList(words2)})) / (Text.Length(words1) + Text.Length(words2)),
return = simpleLikeFuzzy(words1, words2)
in
return
You can use another criteria for it like thisColumns that contains values with few characters. Even with one character difference the result may significantly be decreased.
simpleLikeFuzzy = (words1 as text, words2 as text) as number =>
List.Count(List.Intersect({Text.ToList(words1), Text.ToList(words2)}))
/ (if Text.Length(words1) > Text.Length(words2) then Text.Length(words2) else Text.Length(words1)),
You can use more a strong method. It is not optimized as isAlso, sometimes there are words completely different, they, however, share common characters.
let
words1 = "A big father waks with his own children about a green garden.",
words2 = "With nice children a big father walks about a greate garden.",
toTable = (words as text, idName as text) as any =>
let
col1 = Table.RenameColumns(Table.FromList(Text.ToList(words)), {{"Column1", "char" & idName}}),
idx = Table.AddIndexColumn(col1, idName)
in
idx,
chars1 = toTable(words1, "row"),
chars2 = toTable(words2, "col"),
joined = Table.Join(chars1, "charrow", chars2, "charcol"),
addDif = Table.AddColumn(joined, "dif", each [col] -[row]),
sorted = Table.Sort(addDif,{{"dif", Order.Ascending}, {"row", Order.Ascending}}),
addIdx = Table.AddIndexColumn(sorted, "idx"),
addGlobalDif = Table.AddColumn(addIdx, "gdif", each [row] - [idx])[[charrow], [row], [dif], [gdif]],
rankGroup = Table.Group(addGlobalDif, {"gdif", "dif"}, {{"forExpand", each [[charrow], [row]]}, {"rows", each [row]}, {"count", each List.Count([row])}}),
rankSort = Table.Sort(rankGroup,{{"count", Order.Ascending}}),
rankIndex = Table.Buffer(Table.AddIndexColumn(rankSort[[forExpand], [rows]], "idx")),
step1 = List.Transform(rankIndex[idx], (cur) =>
let
rec = rankIndex{cur},
sub = Table.SelectRows(rankIndex, each (_[idx] > cur) and ( List.Count(List.Intersect({_[rows], rec[rows]})) > 0 )),
result = if Table.RowCount(sub) > 0 then null else rec
in
result
),
step2 = Table.FromRecords(List.RemoveNulls(step1))[[forExpand]],
uniqieRows = Table.ExpandTableColumn(step2, "forExpand", {"charrow", "row"}),
sortedUniques = Table.Sort(uniqieRows,{{"row", Order.Ascending}}),
return = Text.Combine(sortedUniques[charrow])
in
return
let
words1 = "together",
words2 = "to their",
simpleLikeFuzzy = (words1 as text, words2 as text) as number =>
2 * List.Count(List.Intersect({Text.ToList(words1), Text.ToList(words2)})) / (Text.Length(words1) + Text.Length(words2)),
return = simpleLikeFuzzy(words1, words2)
in
return
AlrightIs it possible to use the simpleLikeFuzzy function in the merge process while joining two tables?
let // an etalon word table, maybe loaded from external source
defEtalon = #table(type table [value = text],
{ {"spruce"}, {"cedar"}, {"birch"}, {"pine"}, {"poplar"}, {"aspen"},
{"baobab"}, {"larch"}, {"willow"} }),
addChars = Table.AddColumn(defEtalon, "chars", each Text.ToList([value]), type list),
addLength = Table.AddColumn(addChars, "length", each Text.Length([value]), Int64.Type),
etalonTable = Table.Buffer(addLength),
// define a search function in the etalon table by the simplest fuzzy
lookup = (testText as text) as record =>
let
simpleFuzzy = (testChars as list) as record =>
let
coefficient = Table.AddColumn(etalonTable, "c", each 2 * List.Count(List.Intersect({testChars, [chars]})) / ([length] + List.Count(testChars)), type number)
in
Table.Max(coefficient, "c"),
testChars = Text.ToList(testText)
in
// define record fields from the etalon table for returning
simpleFuzzy(testChars)[[value], [c]],
// a test table for join with etalon table
testTable = #table(type table [word = text],
{ {"sprce"}, {"cidar"}, {"pane"}, {"aspern"}, {"babab"} }),
// to join them
result = Table.AddColumn(testTable, "rec", each lookup([word]), type record),
// expand returned joined records
return = Table.ExpandRecordColumn(result, "rec", {"value", "c"})
in
return