Merge pull request #13070 from zmstone/0518-improve-kafka-connection-error-logs
0518 improve kafka connection error logs
This commit is contained in:
commit
c54d25de98
|
@ -572,33 +572,54 @@ check_client_connectivity(ClientId) ->
|
||||||
{error, {find_client, Reason}}
|
{error, {find_client, Reason}}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
is_alive(Pid) ->
|
||||||
|
is_pid(Pid) andalso erlang:is_process_alive(Pid).
|
||||||
|
|
||||||
|
error_summary(Map, [Error]) ->
|
||||||
|
Map#{error => Error};
|
||||||
|
error_summary(Map, [Error | More]) ->
|
||||||
|
Map#{first_error => Error, total_errors => length(More) + 1}.
|
||||||
|
|
||||||
check_if_healthy_leaders(ClientId, ClientPid, KafkaTopic, MaxPartitions) when is_pid(ClientPid) ->
|
check_if_healthy_leaders(ClientId, ClientPid, KafkaTopic, MaxPartitions) when is_pid(ClientPid) ->
|
||||||
Leaders =
|
case wolff_client:get_leader_connections(ClientPid, KafkaTopic, MaxPartitions) of
|
||||||
case wolff_client:get_leader_connections(ClientPid, KafkaTopic, MaxPartitions) of
|
{ok, Leaders} ->
|
||||||
{ok, LeadersToCheck} ->
|
%% Kafka is considered healthy as long as any of the partition leader is reachable.
|
||||||
%% Kafka is considered healthy as long as any of the partition leader is reachable.
|
case lists:partition(fun({_Partition, Pid}) -> is_alive(Pid) end, Leaders) of
|
||||||
lists:filtermap(
|
{[], Errors} ->
|
||||||
fun({_Partition, Pid}) ->
|
throw(
|
||||||
case is_pid(Pid) andalso erlang:is_process_alive(Pid) of
|
error_summary(
|
||||||
true -> {true, Pid};
|
#{
|
||||||
_ -> false
|
cause => "no_connected_partition_leader",
|
||||||
end
|
kafka_client => ClientId,
|
||||||
end,
|
kafka_topic => KafkaTopic
|
||||||
LeadersToCheck
|
},
|
||||||
);
|
Errors
|
||||||
{error, _} ->
|
)
|
||||||
[]
|
);
|
||||||
end,
|
{_, []} ->
|
||||||
case Leaders of
|
ok;
|
||||||
[] ->
|
{_, Errors} ->
|
||||||
|
?SLOG(
|
||||||
|
warning,
|
||||||
|
"not_all_kafka_partitions_connected",
|
||||||
|
error_summary(
|
||||||
|
#{
|
||||||
|
kafka_client => ClientId,
|
||||||
|
kafka_topic => KafkaTopic
|
||||||
|
},
|
||||||
|
Errors
|
||||||
|
)
|
||||||
|
),
|
||||||
|
ok
|
||||||
|
end;
|
||||||
|
{error, Reason} ->
|
||||||
|
%% If failed to fetch metadata, wolff_client logs a warning level message
|
||||||
|
%% which includes the reason for each seed host
|
||||||
throw(#{
|
throw(#{
|
||||||
error => no_connected_partition_leader,
|
cause => Reason,
|
||||||
kafka_client => ClientId,
|
kafka_client => ClientId,
|
||||||
kafka_topic => KafkaTopic,
|
kafka_topic => KafkaTopic
|
||||||
partitions_limit => MaxPartitions
|
})
|
||||||
});
|
|
||||||
_ ->
|
|
||||||
ok
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
check_topic_status(ClientId, WolffClientPid, KafkaTopic) ->
|
check_topic_status(ClientId, WolffClientPid, KafkaTopic) ->
|
||||||
|
|
|
@ -245,7 +245,7 @@ t_license_setting_bc(_Config) ->
|
||||||
?assertMatch(#{<<"max_connections">> := 25}, request_dump()),
|
?assertMatch(#{<<"max_connections">> := 25}, request_dump()),
|
||||||
%% get
|
%% get
|
||||||
GetRes = request(get, uri(["license", "setting"]), []),
|
GetRes = request(get, uri(["license", "setting"]), []),
|
||||||
%% aslo check that the settings return correctly
|
%% also check that the settings return correctly
|
||||||
validate_setting(GetRes, <<"75%">>, <<"80%">>, 25),
|
validate_setting(GetRes, <<"75%">>, <<"80%">>, 25),
|
||||||
%% update
|
%% update
|
||||||
Low = <<"50%">>,
|
Low = <<"50%">>,
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
Improve Kafka connector error logs.
|
||||||
|
|
||||||
|
Previously, specific error details, such as unreachable advertised listeners, were not logged.
|
||||||
|
Now, error details are captured in the logs to provide more diagnostic information.
|
||||||
|
To manage log verbosity, only the first occurrence of an error is logged, accompanied by the total count of similar errors.
|
Loading…
Reference in New Issue