// Copyright 2023 LiveKit, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package livekit.proto; option csharp_namespace = "LiveKit.Proto"; // import "handle.proto"; import "e2ee.proto"; import "track.proto"; import "room.proto"; import "video_frame.proto"; import "audio_frame.proto"; // **How is the livekit-ffi working: // We refer as the ffi server the Rust server that is running the LiveKit client implementation, and we // refer as the ffi client the foreign language that commumicates with the ffi server. (e.g Python SDK, Unity SDK, etc...) // // We expose the Rust client implementation of livekit using the protocol defined here. // Everything starts with a FfiRequest, which is a oneof message that contains all the possible // requests that can be made to the ffi server. // The server will then respond with a FfiResponse, which is also a oneof message that contains // all the possible responses. // The first request sent to the server must be an InitializeRequest, which contains the a pointer // to the callback function that will be used to send events and async responses to the ffi client. // (e.g participant joined, track published, etc...) // // **Useful things know when collaborating on the protocol:** // Everything is subject to discussion and change :-) // // - The ffi client implementation must never forget to correctly dispose all the owned handles // that it receives from the server. // // Therefore, the ffi client is easier to implement if there is less handles to manage. // // - We are mainly using FfiHandle on info messages (e.g: RoomInfo, TrackInfo, etc...) // For this reason, info are only sent once, at creation (We're not using them for updates, we can infer them from // events on the client implementation). // e.g: set speaking to true when we receive a ActiveSpeakerChanged event. // This is the input of livekit_ffi_request function // We always expect a response (FFIResponse, even if it's empty) message FfiRequest { oneof message { DisposeRequest dispose = 2; // Room ConnectRequest connect = 3; DisconnectRequest disconnect = 4; PublishTrackRequest publish_track = 5; UnpublishTrackRequest unpublish_track = 6; PublishDataRequest publish_data = 7; SetSubscribedRequest set_subscribed = 8; UpdateLocalMetadataRequest update_local_metadata = 9; UpdateLocalNameRequest update_local_name = 10; GetSessionStatsRequest get_session_stats = 11; // Track CreateVideoTrackRequest create_video_track = 12; CreateAudioTrackRequest create_audio_track = 13; GetStatsRequest get_stats = 14; // Video NewVideoStreamRequest new_video_stream = 16; NewVideoSourceRequest new_video_source = 17; CaptureVideoFrameRequest capture_video_frame = 18; VideoConvertRequest video_convert = 19; // Audio NewAudioStreamRequest new_audio_stream = 22; NewAudioSourceRequest new_audio_source = 23; CaptureAudioFrameRequest capture_audio_frame = 24; NewAudioResamplerRequest new_audio_resampler = 25; RemixAndResampleRequest remix_and_resample = 26; E2eeRequest e2ee = 27; } } // This is the output of livekit_ffi_request function. message FfiResponse { oneof message { DisposeResponse dispose = 2; // Room ConnectResponse connect = 3; DisconnectResponse disconnect = 4; PublishTrackResponse publish_track = 5; UnpublishTrackResponse unpublish_track = 6; PublishDataResponse publish_data = 7; SetSubscribedResponse set_subscribed = 8; UpdateLocalMetadataResponse update_local_metadata = 9; UpdateLocalNameResponse update_local_name = 10; GetSessionStatsResponse get_session_stats = 11; // Track CreateVideoTrackResponse create_video_track = 12; CreateAudioTrackResponse create_audio_track = 13; GetStatsResponse get_stats = 14; // Video NewVideoStreamResponse new_video_stream = 16; NewVideoSourceResponse new_video_source = 17; CaptureVideoFrameResponse capture_video_frame = 18; VideoConvertResponse video_convert = 19; // Audio NewAudioStreamResponse new_audio_stream = 22; NewAudioSourceResponse new_audio_source = 23; CaptureAudioFrameResponse capture_audio_frame = 24; NewAudioResamplerResponse new_audio_resampler = 25; RemixAndResampleResponse remix_and_resample = 26; E2eeResponse e2ee = 27; } } // To minimize complexity, participant events are not included in the protocol. // It is easily deducible from the room events and it turned out that is is easier to implement // on the ffi client side. message FfiEvent { oneof message { RoomEvent room_event = 1; TrackEvent track_event = 2; VideoStreamEvent video_stream_event = 3; AudioStreamEvent audio_stream_event = 4; ConnectCallback connect = 5; DisconnectCallback disconnect = 6; DisposeCallback dispose = 7; PublishTrackCallback publish_track = 8; UnpublishTrackCallback unpublish_track = 9; PublishDataCallback publish_data = 10; CaptureAudioFrameCallback capture_audio_frame = 11; UpdateLocalMetadataCallback update_local_metadata = 12; UpdateLocalNameCallback update_local_name = 13; GetStatsCallback get_stats = 14; LogBatch logs = 15; GetSessionStatsCallback get_session_stats = 16; Panic panic = 17; } } // Stop all rooms synchronously (Do we need async here?). // e.g: This is used for the Unity Editor after each assemblies reload. // TODO(theomonnom): Implement a debug mode where we can find all leaked handles? message DisposeRequest { bool async = 1; } message DisposeResponse { optional uint64 async_id = 1; // None if sync } message DisposeCallback { uint64 async_id = 1; } enum LogLevel { LOG_ERROR = 0; LOG_WARN = 1; LOG_INFO = 2; LOG_DEBUG = 3; LOG_TRACE = 4; } message LogRecord { LogLevel level = 1; string target = 2; // e.g "livekit", "libwebrtc", "tokio-tungstenite", etc... optional string module_path = 3; optional string file = 4; optional uint32 line = 5; string message = 6; } message LogBatch { repeated LogRecord records = 1; } message Panic { string message = 1; } // TODO(theomonnom): Debug messages (Print handles).