perf(ft-asm): express coverage through shortest path on graph
Coverage becomes the shortest path problem on graph where nodes are offsets and edges are nodes' segments. Implement a simple Dijkstra algorithm to find one.
This commit is contained in:
parent
0c84fc28b0
commit
130601376a
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
-export([to_list/1]).
|
-export([to_list/1]).
|
||||||
-export([from_list/1]).
|
-export([from_list/1]).
|
||||||
|
-export([define/2]).
|
||||||
-export([apply/2]).
|
-export([apply/2]).
|
||||||
|
|
||||||
-spec to_list(maybe(A)) -> [A].
|
-spec to_list(maybe(A)) -> [A].
|
||||||
|
@ -34,6 +35,12 @@ from_list([]) ->
|
||||||
from_list([Term]) ->
|
from_list([Term]) ->
|
||||||
Term.
|
Term.
|
||||||
|
|
||||||
|
-spec define(maybe(A), B) -> A | B.
|
||||||
|
define(undefined, Term) ->
|
||||||
|
Term;
|
||||||
|
define(Term, _) ->
|
||||||
|
Term.
|
||||||
|
|
||||||
%% @doc Apply a function to a maybe argument.
|
%% @doc Apply a function to a maybe argument.
|
||||||
-spec apply(fun((maybe(A)) -> maybe(A)), maybe(A)) ->
|
-spec apply(fun((maybe(A)) -> maybe(A)), maybe(A)) ->
|
||||||
maybe(A).
|
maybe(A).
|
||||||
|
@ -60,6 +67,13 @@ from_list_test_() ->
|
||||||
?_assertError(_, from_list([1, 2, 3]))
|
?_assertError(_, from_list([1, 2, 3]))
|
||||||
].
|
].
|
||||||
|
|
||||||
|
define_test_() ->
|
||||||
|
[
|
||||||
|
?_assertEqual(42, define(42, undefined)),
|
||||||
|
?_assertEqual(<<"default">>, define(undefined, <<"default">>)),
|
||||||
|
?_assertEqual(undefined, define(undefined, undefined))
|
||||||
|
].
|
||||||
|
|
||||||
apply_test_() ->
|
apply_test_() ->
|
||||||
[
|
[
|
||||||
?_assertEqual(<<"42">>, ?MODULE:apply(fun erlang:integer_to_binary/1, 42)),
|
?_assertEqual(<<"42">>, ?MODULE:apply(fun erlang:integer_to_binary/1, 42)),
|
||||||
|
|
|
@ -123,7 +123,7 @@ status(meta, []) ->
|
||||||
status(meta, [_M1, _M2 | _] = Metas) ->
|
status(meta, [_M1, _M2 | _] = Metas) ->
|
||||||
{error, {inconsistent, [Frag#{node => Node} || {_, {Node, Frag}} <- Metas]}};
|
{error, {inconsistent, [Frag#{node => Node} || {_, {Node, Frag}} <- Metas]}};
|
||||||
status(coverage, #asm{segs = Segments, size = Size}) ->
|
status(coverage, #asm{segs = Segments, size = Size}) ->
|
||||||
case coverage(squash(Segments), Size) of
|
case coverage(Segments, Size) of
|
||||||
Coverage when is_list(Coverage) ->
|
Coverage when is_list(Coverage) ->
|
||||||
{complete, Coverage, #{
|
{complete, Coverage, #{
|
||||||
dominant => dominant(Coverage)
|
dominant => dominant(Coverage)
|
||||||
|
@ -145,8 +145,7 @@ append_segmentinfo(Asm, Node, Fragment = #{fragment := {segment, Info}}) ->
|
||||||
Offset = maps:get(offset, Info),
|
Offset = maps:get(offset, Info),
|
||||||
Size = maps:get(size, Info),
|
Size = maps:get(size, Info),
|
||||||
End = Offset + Size,
|
End = Offset + Size,
|
||||||
Index = {Offset, locality(Node), -End, Node},
|
Segs = add_edge(Asm#asm.segs, Offset, End, locality(Node) * Size, {Node, Fragment}),
|
||||||
Segs = insert(Asm#asm.segs, Index, [Fragment]),
|
|
||||||
Asm#asm{
|
Asm#asm{
|
||||||
% TODO
|
% TODO
|
||||||
% In theory it's possible to have two segments with same offset + size on
|
% In theory it's possible to have two segments with same offset + size on
|
||||||
|
@ -155,69 +154,143 @@ append_segmentinfo(Asm, Node, Fragment = #{fragment := {segment, Info}}) ->
|
||||||
segs = Segs
|
segs = Segs
|
||||||
}.
|
}.
|
||||||
|
|
||||||
squash(Segs) ->
|
|
||||||
% NOTE
|
|
||||||
% Here we're "compressing" information about every known segment by adjoining
|
|
||||||
% nearby segments on the same node into "runs".
|
|
||||||
squash(Segs, gb_trees:next(gb_trees:iterator(Segs))).
|
|
||||||
|
|
||||||
squash(Segs, {Index = {Offset, Locality, MEnd, _}, Fragments, It}) ->
|
|
||||||
SegsSquashed = squash_run(gb_trees:delete(Index, Segs), Index, Fragments, It),
|
|
||||||
ItNext = gb_trees:iterator_from({Offset, Locality, MEnd + 1, 0}, SegsSquashed),
|
|
||||||
squash(SegsSquashed, gb_trees:next(ItNext));
|
|
||||||
squash(Segs, none) ->
|
|
||||||
Segs.
|
|
||||||
|
|
||||||
squash_run(Segs, {Offset, Locality, MEnd, Node} = Index, Fragments, It) ->
|
|
||||||
Next = gb_trees:next(It),
|
|
||||||
case Next of
|
|
||||||
{{OffsetNext, _, MEndNext, Node} = IndexNext, FragmentsNext, ItNext} when
|
|
||||||
OffsetNext == -MEnd
|
|
||||||
->
|
|
||||||
SegsNext = gb_trees:delete(IndexNext, Segs),
|
|
||||||
IndexSquashed = {Offset, Locality, MEndNext, Node},
|
|
||||||
squash_run(SegsNext, IndexSquashed, Fragments ++ FragmentsNext, ItNext);
|
|
||||||
{{OffsetNext, _, _, _}, _, ItNext} when OffsetNext =< -MEnd ->
|
|
||||||
squash_run(Segs, Index, Fragments, ItNext);
|
|
||||||
_ ->
|
|
||||||
insert(Segs, Index, Fragments)
|
|
||||||
end.
|
|
||||||
|
|
||||||
insert(Segs, Index, Fragments) ->
|
|
||||||
try
|
|
||||||
gb_trees:insert(Index, Fragments, Segs)
|
|
||||||
catch
|
|
||||||
error:{key_exists, _} -> Segs
|
|
||||||
end.
|
|
||||||
|
|
||||||
coverage(Segs, Size) ->
|
coverage(Segs, Size) ->
|
||||||
coverage(gb_trees:next(gb_trees:iterator(Segs)), 0, Size).
|
find_shortest_path(Segs, 0, Size).
|
||||||
|
|
||||||
coverage({{Offset, _, _, _}, _Fragments, It}, Cursor, Sz) when Offset < Cursor ->
|
find_shortest_path(G1, From, To) ->
|
||||||
coverage(gb_trees:next(It), Cursor, Sz);
|
|
||||||
coverage({{Cursor, _Locality, MEnd, Node}, Fragments, It}, Cursor, Sz) ->
|
|
||||||
% NOTE
|
% NOTE
|
||||||
% We consider only whole fragments here, so for example from the point of view of
|
% This is a Dijkstra shortest path algorithm implemented on top of `gb_trees`.
|
||||||
% this algo `[{Offset1 = 0, Size1 = 15}, {Offset2 = 10, Size2 = 10}]` has no
|
% It is one-way right now, for simplicity sake.
|
||||||
% coverage.
|
G2 = set_cost(G1, From, 0, []),
|
||||||
ItNext = gb_trees:next(It),
|
case find_shortest_path(G2, From, 0, To) of
|
||||||
case coverage(ItNext, -MEnd, Sz) of
|
{found, G3} ->
|
||||||
Coverage when is_list(Coverage) ->
|
construct_path(G3, From, To, []);
|
||||||
[{Node, Frag} || Frag <- Fragments] ++ Coverage;
|
{error, Last} ->
|
||||||
Missing = {missing, _} ->
|
% NOTE: this is actually just an estimation of what is missing.
|
||||||
case coverage(ItNext, Cursor, Sz) of
|
{missing, {segment, Last, emqx_maybe:define(find_successor(G2, Last), To)}}
|
||||||
CoverageAlt when is_list(CoverageAlt) ->
|
end.
|
||||||
CoverageAlt;
|
|
||||||
{missing, _} ->
|
find_shortest_path(G1, Node, Cost, Target) ->
|
||||||
Missing
|
Edges = get_edges(G1, Node),
|
||||||
|
G2 = update_neighbours(G1, Node, Cost, Edges),
|
||||||
|
case take_queued(G2) of
|
||||||
|
{Target, _NextCost, G3} ->
|
||||||
|
{found, G3};
|
||||||
|
{Next, NextCost, G3} ->
|
||||||
|
find_shortest_path(G3, Next, NextCost, Target);
|
||||||
|
none ->
|
||||||
|
{error, Node}
|
||||||
|
end.
|
||||||
|
|
||||||
|
construct_path(_G, From, From, Acc) ->
|
||||||
|
Acc;
|
||||||
|
construct_path(G, From, To, Acc) ->
|
||||||
|
{Prev, Label} = get_label(G, To),
|
||||||
|
construct_path(G, From, Prev, [Label | Acc]).
|
||||||
|
|
||||||
|
update_neighbours(G1, Node, NodeCost, Edges) ->
|
||||||
|
lists:foldl(
|
||||||
|
fun({Neighbour, Weight, Label}, GAcc) ->
|
||||||
|
case is_visited(GAcc, Neighbour) of
|
||||||
|
false ->
|
||||||
|
NeighCost = NodeCost + Weight,
|
||||||
|
CurrentCost = get_cost(GAcc, Neighbour),
|
||||||
|
case NeighCost < CurrentCost of
|
||||||
|
true ->
|
||||||
|
set_cost(GAcc, Neighbour, NeighCost, {Node, Label});
|
||||||
|
false ->
|
||||||
|
GAcc
|
||||||
|
end;
|
||||||
|
true ->
|
||||||
|
GAcc
|
||||||
end
|
end
|
||||||
end;
|
end,
|
||||||
coverage({{Offset, _, _MEnd, _}, _Fragments, _It}, Cursor, _Sz) when Offset > Cursor ->
|
G1,
|
||||||
{missing, {segment, Cursor, Offset}};
|
Edges
|
||||||
coverage(none, Cursor, Sz) when Cursor < Sz ->
|
).
|
||||||
{missing, {segment, Cursor, Sz}};
|
|
||||||
coverage(none, Cursor, Cursor) ->
|
add_edge(G, Node, ToNode, WeightIn, EdgeLabel) ->
|
||||||
[].
|
Edges = tree_lookup({Node}, G, []),
|
||||||
|
case lists:keyfind(ToNode, 1, Edges) of
|
||||||
|
{ToNode, Weight, _} when Weight =< WeightIn ->
|
||||||
|
% NOTE
|
||||||
|
% Discarding any edges with higher weight here. This is fine as long as we
|
||||||
|
% optimize for locality.
|
||||||
|
G;
|
||||||
|
_ ->
|
||||||
|
EdgesNext = lists:keystore(ToNode, 1, Edges, {ToNode, WeightIn, EdgeLabel}),
|
||||||
|
tree_update({Node}, EdgesNext, G)
|
||||||
|
end.
|
||||||
|
|
||||||
|
get_edges(G, Node) ->
|
||||||
|
tree_lookup({Node}, G, []).
|
||||||
|
|
||||||
|
get_cost(G, Node) ->
|
||||||
|
tree_lookup({Node, cost}, G, inf).
|
||||||
|
|
||||||
|
get_label(G, Node) ->
|
||||||
|
gb_trees:get({Node, label}, G).
|
||||||
|
|
||||||
|
set_cost(G1, Node, Cost, Label) ->
|
||||||
|
G3 =
|
||||||
|
case tree_lookup({Node, cost}, G1, inf) of
|
||||||
|
CostWas when CostWas /= inf ->
|
||||||
|
{true, G2} = gb_trees:take({queued, CostWas, Node}, G1),
|
||||||
|
tree_update({queued, Cost, Node}, true, G2);
|
||||||
|
inf ->
|
||||||
|
tree_update({queued, Cost, Node}, true, G1)
|
||||||
|
end,
|
||||||
|
G4 = tree_update({Node, cost}, Cost, G3),
|
||||||
|
G5 = tree_update({Node, label}, Label, G4),
|
||||||
|
G5.
|
||||||
|
|
||||||
|
take_queued(G1) ->
|
||||||
|
It = gb_trees:iterator_from({queued, 0, 0}, G1),
|
||||||
|
case gb_trees:next(It) of
|
||||||
|
{{queued, Cost, Node} = Index, true, _It} ->
|
||||||
|
{Node, Cost, gb_trees:delete(Index, G1)};
|
||||||
|
_ ->
|
||||||
|
none
|
||||||
|
end.
|
||||||
|
|
||||||
|
is_visited(G, Node) ->
|
||||||
|
case tree_lookup({Node, cost}, G, inf) of
|
||||||
|
inf ->
|
||||||
|
false;
|
||||||
|
Cost ->
|
||||||
|
not tree_lookup({queued, Cost, Node}, G, false)
|
||||||
|
end.
|
||||||
|
|
||||||
|
find_successor(G, Node) ->
|
||||||
|
case gb_trees:next(gb_trees:iterator_from({Node}, G)) of
|
||||||
|
{{Node}, _, It} ->
|
||||||
|
case gb_trees:next(It) of
|
||||||
|
{{Successor}, _, _} ->
|
||||||
|
Successor;
|
||||||
|
_ ->
|
||||||
|
undefined
|
||||||
|
end;
|
||||||
|
{{Successor}, _, _} ->
|
||||||
|
Successor;
|
||||||
|
_ ->
|
||||||
|
undefined
|
||||||
|
end.
|
||||||
|
|
||||||
|
tree_lookup(Index, Tree, Default) ->
|
||||||
|
case gb_trees:lookup(Index, Tree) of
|
||||||
|
{value, V} ->
|
||||||
|
V;
|
||||||
|
none ->
|
||||||
|
Default
|
||||||
|
end.
|
||||||
|
|
||||||
|
tree_update(Index, Value, Tree) ->
|
||||||
|
case gb_trees:take_any(Index, Tree) of
|
||||||
|
{_, TreeNext} ->
|
||||||
|
gb_trees:insert(Index, Value, TreeNext);
|
||||||
|
error ->
|
||||||
|
gb_trees:insert(Index, Value, Tree)
|
||||||
|
end.
|
||||||
|
|
||||||
dominant(Coverage) ->
|
dominant(Coverage) ->
|
||||||
% TODO: needs improvement, better defined _dominance_, maybe some score
|
% TODO: needs improvement, better defined _dominance_, maybe some score
|
||||||
|
@ -379,8 +452,7 @@ missing_coverage_test() ->
|
||||||
],
|
],
|
||||||
Asm = append_many(new(100), Segs),
|
Asm = append_many(new(100), Segs),
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
% {incomplete, {missing, {segment, 30, 40}}}, ???
|
{incomplete, {missing, {segment, 30, 40}}},
|
||||||
{incomplete, {missing, {segment, 20, 40}}},
|
|
||||||
status(coverage, Asm)
|
status(coverage, Asm)
|
||||||
).
|
).
|
||||||
|
|
||||||
|
@ -407,7 +479,7 @@ missing_coverage_with_redudancy_test() ->
|
||||||
Asm = append_many(new(100), Segs),
|
Asm = append_many(new(100), Segs),
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
% {incomplete, {missing, {segment, 50, 60}}}, ???
|
% {incomplete, {missing, {segment, 50, 60}}}, ???
|
||||||
{incomplete, {missing, {segment, 20, 40}}},
|
{incomplete, {missing, {segment, 60, 100}}},
|
||||||
status(coverage, Asm)
|
status(coverage, Asm)
|
||||||
).
|
).
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue