diff --git a/.gitignore b/.gitignore index de1a3ea..d7b0498 100755 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,9 @@ /target *.db* .env -/videos almond.toml + +# almond media folders +/media +/channels +/videos diff --git a/Cargo.lock b/Cargo.lock index 827e832..3f2b734 100755 --- a/Cargo.lock +++ b/Cargo.lock @@ -820,9 +820,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.171" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libm" @@ -1414,9 +1414,9 @@ dependencies = [ [[package]] name = "sqlx" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14e22987355fbf8cfb813a0cf8cd97b1b4ec834b94dbd759a9e8679d41fabe83" +checksum = "f3c3a85280daca669cfd3bcb68a337882a8bc57ec882f72c5d13a430613a738e" dependencies = [ "sqlx-core", "sqlx-macros", @@ -1427,9 +1427,9 @@ dependencies = [ [[package]] name = "sqlx-core" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55c4720d7d4cd3d5b00f61d03751c685ad09c33ae8290c8a2c11335e0604300b" +checksum = "f743f2a3cea30a58cd479013f75550e879009e3a02f616f18ca699335aa248c3" dependencies = [ "base64", "bytes", @@ -1461,9 +1461,9 @@ dependencies = [ [[package]] name = "sqlx-macros" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "175147fcb75f353ac7675509bc58abb2cb291caf0fd24a3623b8f7e3eb0a754b" +checksum = "7f4200e0fde19834956d4252347c12a083bdcb237d7a1a1446bffd8768417dce" dependencies = [ "proc-macro2", "quote", @@ -1474,9 +1474,9 @@ dependencies = [ [[package]] name = "sqlx-macros-core" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cde983058e53bfa75998e1982086c5efe3c370f3250bf0357e344fa3352e32b" +checksum = "882ceaa29cade31beca7129b6beeb05737f44f82dbe2a9806ecea5a7093d00b7" dependencies = [ "dotenvy", "either", @@ -1500,9 +1500,9 @@ dependencies = [ [[package]] name = "sqlx-mysql" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "847d2e5393a4f39e47e4f36cab419709bc2b83cbe4223c60e86e1471655be333" +checksum = "0afdd3aa7a629683c2d750c2df343025545087081ab5942593a5288855b1b7a7" dependencies = [ "atoi", "base64", @@ -1542,9 +1542,9 @@ dependencies = [ [[package]] name = "sqlx-postgres" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc35947a541b9e0a2e3d85da444f1c4137c13040267141b208395a0d0ca4659f" +checksum = "a0bedbe1bbb5e2615ef347a5e9d8cd7680fb63e77d9dafc0f29be15e53f1ebe6" dependencies = [ "atoi", "base64", @@ -1579,9 +1579,9 @@ dependencies = [ [[package]] name = "sqlx-sqlite" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c48291dac4e5ed32da0927a0b981788be65674aeb62666d19873ab4289febde" +checksum = "c26083e9a520e8eb87a06b12347679b142dc2ea29e6e409f805644a7a979a5bc" dependencies = [ "atoi", "flume", diff --git a/Cargo.toml b/Cargo.toml index 6cd7df5..f04b459 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ version = "0.2.0" edition = "2024" [dependencies] -axum = { version = "0.8.3", features = ["macros"] } +axum = { version = "0.8.3", features = ["macros", "tokio"] } dotenvy = "0.15.7" regex = "1.11.1" serde = { version = "1.0.219", features = ["derive"] } diff --git a/README.md b/README.md index e364b0e..259da2f 100755 --- a/README.md +++ b/README.md @@ -1,15 +1,47 @@ # Almond API -**TODO:** Finish this README. +**TODO:** Complete this README. + +Almond is a suite of tools that let you archive videos into a database, originally written in Python several years ago with a friend, it now has revamped code and support for many more features. Almond v2 now uses an API instead of being merged as frontend routes in a web app. This allows developers to create custom clients for it. +## Getting started + +You can get a binary build in the [Releases](https://git.roaming97.com/Almond/api/releases) page, just download it and run it either in your server or locally. + +However, if no release build is compatible with your system, you can build one yourself by following the [Building](#building) guide. + +To learn how to use the Almond API, you can read the documentation [here](todo) (TODO: Generate OpenAPI docs). + ## Requirements -You can download binary builds in the [Releases](https://git.roaming97.com/Almond/api/releases) page and just run it in your server or locally. +> [!IMPORTANT] +> To run the Almond API, you **must** have these installed on your system: -However, if you prefer to build a binary for your system, you must have [Rust](https://www.rust-lang.org/tools/install) installed and have the build configured to your needs. **This option is for developers, it is assumed that you know what you're doing.** +- **[yt-dlp](https://github.com/yt-dlp/yt-dlp/)** - for downloading YouTube videos, does most of the heavy lifting. Install options are listed [there too](https://github.com/yt-dlp/yt-dlp?tab=readme-ov-file#installation). +- **[yt-dlp-returnyoutubedislike](https://github.com/pukkandan/yt-dlp-returnyoutubedislike)** - for adding ReturnYouTubeDislike information to a video's information JSON instead of adding an additional request to the API. +- **[ffmpeg](https://ffmpeg.org/)** - for video processing in some cases. -- [yt-dlp](https://github.com/yt-dlp/yt-dlp/) - for downloading YouTube videos, does most of the heavy lifting. Install options are listed [there too](https://github.com/yt-dlp/yt-dlp?tab=readme-ov-file#installation). -- [yt-dlp-returnyoutubedislike](https://github.com/pukkandan/yt-dlp-returnyoutubedislike) - for adding ReturnYouTubeDislike information to a video's information JSON instead of adding an additional request to the API. +### Nice to have + +- **An HTTP client like wget, curl, or [HTTPie](https://httpie.io/)** - to request the API without needing an existing front-end, ideal for using the API as a local archiver. +- **[DB Browser for SQLite](https://sqlitebrowser.org/)** - to view the contents of your tables inside the Almond database. + +> [!WARNING] +> The database currently runs on SQLite, but it may change in the future so the requirements might be different. + +## Building + +Since this project is written in Rust, you will need [Rust](https://rust-lang.org/) installed on your system. The site has an [installation](https://www.rust-lang.org/tools/install) page that includes instructions for macOS, Linux, Windows, and more. + +If you have Git installed on your system and are comfortable using a terminal, you can clone the repository with the following command: + +```bash +$ git clone https://git.roaming97.com/Almond/api.git +``` + +If not, you can download a file by clicking on the "Code" button at the top and extract the source code. + +TODO: Finish build guide diff --git a/almond.example.toml b/almond.example.toml index 8c64359..beada27 100755 --- a/almond.example.toml +++ b/almond.example.toml @@ -1,4 +1,8 @@ -host = "127.0.0.1" -port = 3000 -videos_per_page = 10 -comments_per_page = 10 +[address] +host = "0.0.0.0" +port = 8080 + +[pagination] +videos = 10 +comments = 10 +channels = 10 diff --git a/migrations/20250415075127_create-channel-table.sql b/migrations/20250415075127_create-channel-table.sql new file mode 100755 index 0000000..e951175 --- /dev/null +++ b/migrations/20250415075127_create-channel-table.sql @@ -0,0 +1,12 @@ +CREATE TABLE channel ( + "id" INTEGER NOT NULL, + "url" TEXT NOT NULL, + "youtube_id" TEXT NOT NULL, + "name" TEXT NOT NULL, + "handle_url" TEXT NOT NULL, + "avatar_url" TEXT NOT NULL, + "banner_url" TEXT NOT NULL, + "description" TEXT NOT NULL, + "subscribers" INTEGER NOT NULL, + PRIMARY KEY("id") +) \ No newline at end of file diff --git a/src/channel.rs b/src/channel.rs index 263d62c..ad7e471 100755 --- a/src/channel.rs +++ b/src/channel.rs @@ -1,4 +1,146 @@ -use serde::{Deserialize, Serialize}; +use std::{io, path::Path}; -#[derive(Debug, Default, Serialize, Deserialize, Clone)] -pub struct Channel; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use thiserror::Error; +use tokio::fs; +use tracing::{info, warn}; +use url::Url; + +use crate::string::ToUnquotedString; + +#[derive(Debug, Error)] +pub enum ChannelError { + #[error("URL is an invalid YouTube channel URL")] + InvalidUrl, + #[error("Channel already exists in database")] + AlreadyExists, + #[error("IO Error: {0}")] + IOError(#[from] io::Error), + #[error("Could not serialize info JSON: {0}")] + SerializeInfoJSON(#[from] serde_json::Error), + #[error("Failed to parse value from key '{0}'")] + JsonKey(String), +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct Channel { + pub id: i64, + pub url: String, + pub youtube_id: String, + pub name: String, + pub handle_url: String, + pub avatar_url: String, + pub banner_url: String, + pub description: String, + pub subscribers: i64, +} + +impl Channel { + async fn yt_dlp_task(url: &str) -> Result<(), ChannelError> { + let mut child = tokio::process::Command::new("yt-dlp") + .args([ + "--write-info-json", + "--skip-download", + "-v", + "-o", + "channels/tmp/tmp.%(ext)s", + url, + ]) + .spawn()?; + info!("yt-dlp task invoked"); + child.wait().await?; + info!("yt-dlp task completed successfully"); + + Ok(()) + } + + pub async fn from_url(url: &Url, id: i64) -> Result { + let url_path = url.path(); + + if !url_path.starts_with("/@") + && !url_path.starts_with("/c/") + && !url_path.starts_with("/channel/") + { + return Err(ChannelError::InvalidUrl); + } + + // This task is light enough so it can run on its entirety + Self::yt_dlp_task(url.as_str()).await?; + let info: Value = + serde_json::from_str(&fs::read_to_string("channels/tmp/tmp.info.json").await?)?; + + let get_info_value = |key: &str| { + info.get(key) + .ok_or_else(|| ChannelError::JsonKey(key.to_string())) + .unwrap() + }; + + let youtube_id = get_info_value("channel_id").to_unquoted_string(); + let dir = format!("channels/{youtube_id}"); + let file_stem = format!("{dir}/{youtube_id}"); + + if Path::new(&dir).exists() { + warn!("Channel already exists, skipping"); + return Err(ChannelError::AlreadyExists); + } + + fs::create_dir(dir).await?; + + let url = get_info_value("channel_url").to_unquoted_string(); + let name = get_info_value("channel").to_unquoted_string(); + let handle_url = get_info_value("uploader_url").to_unquoted_string(); + + let mut avatar_url = String::new(); + let mut banner_url = String::new(); + + if let Some(thumbnails) = info.get("thumbnails").and_then(Value::as_array) { + for thumbnail in thumbnails { + if let Some(id) = thumbnail.get("id").and_then(Value::as_str) { + match id { + "avatar_uncropped" => { + avatar_url = thumbnail + .get("url") + .and_then(Value::as_str) + .unwrap_or_default() + .to_string(); + } + "banner_uncropped" => { + banner_url = thumbnail + .get("url") + .and_then(Value::as_str) + .unwrap_or_default() + .to_string(); + } + _ => {} + } + } + } + } else { + warn!("Channel {youtube_id} has no thumbnails!"); + } + + let description = get_info_value("description").to_unquoted_string(); + let subscribers = get_info_value("channel_follower_count") + .as_i64() + .unwrap_or_default(); + + fs::rename( + "channels/tmp/tmp.info.json", + format!("{file_stem}.info.json"), + ) + .await?; + + Ok(Self { + id, + url, + youtube_id, + name, + handle_url, + avatar_url, + banner_url, + description, + subscribers, + }) + } +} diff --git a/src/comment.rs b/src/comment.rs index aec1e27..ae2231a 100755 --- a/src/comment.rs +++ b/src/comment.rs @@ -6,6 +6,8 @@ use thiserror::Error; use tokio::fs; use tracing::{error, warn}; +use crate::string::ToUnquotedString; + #[derive(Debug, Error)] pub enum CommentsError { #[error("Target video {0} not found in database")] @@ -56,15 +58,15 @@ pub async fn get_comments_from_video(id: &str) -> Result, CommentsE Ok(comments .iter() .map(|c| Comment { - id: c["id"].to_string(), + id: c["id"].to_unquoted_string(), video_id: id.into(), - parent: c["parent"].to_string(), - text: c["text"].to_string(), + parent: c["parent"].to_unquoted_string(), + text: c["text"].to_unquoted_string(), like_count: c["like_count"].as_i64().unwrap_or_default(), - author_id: c["author_id"].to_string(), - author: c["author"].to_string(), - author_thumbnail: c["author_thumbnail"].to_string(), - author_url: c["author_url"].to_string(), + author_id: c["author_id"].to_unquoted_string(), + author: c["author"].to_unquoted_string(), + author_thumbnail: c["author_thumbnail"].to_unquoted_string(), + author_url: c["author_url"].to_unquoted_string(), timestamp: c["timestamp"].as_i64().unwrap_or_default(), author_is_uploader: c["author_is_uploader"].as_bool().unwrap_or_default(), author_is_verified: c["author_is_verified"].as_bool().unwrap_or_default(), diff --git a/src/config.rs b/src/config.rs new file mode 100755 index 0000000..9282c6b --- /dev/null +++ b/src/config.rs @@ -0,0 +1,45 @@ +use serde::Deserialize; + +#[derive(Debug, Clone, Deserialize)] +pub struct Address { + pub host: String, + pub port: u16, +} + +impl Default for Address { + fn default() -> Self { + Self { + host: "0.0.0.0".into(), + port: 8000, + } + } +} + +impl Address { + pub fn get_url(&self) -> String { + format!("{}:{}", self.host, self.port) + } +} + +#[derive(Debug, Clone, Deserialize)] +pub struct Pagination { + pub videos: usize, + pub comments: usize, + pub channels: usize, +} + +impl Default for Pagination { + fn default() -> Self { + Self { + videos: 10, + comments: 10, + channels: 10, + } + } +} + +#[derive(Debug, Default, Clone, Deserialize)] +pub struct Config { + pub address: Address, + pub pagination: Pagination, +} diff --git a/src/instance.rs b/src/instance.rs index e6f497b..cc171a7 100755 --- a/src/instance.rs +++ b/src/instance.rs @@ -1,28 +1,10 @@ use std::fs; -use serde::{Deserialize, Serialize}; use sqlx::SqlitePool; use thiserror::Error; use tracing::{error, warn}; -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Config { - pub host: String, - pub port: u16, - pub videos_per_page: usize, - pub comments_per_page: usize, -} - -impl Default for Config { - fn default() -> Self { - Self { - host: "0.0.0.0".into(), - port: 3000, - videos_per_page: 10, - comments_per_page: 10, - } - } -} +use crate::config::Config; /// This controls an instance and its state. #[derive(Debug, Clone)] diff --git a/src/main.rs b/src/main.rs index bd6b2b5..caded08 100755 --- a/src/main.rs +++ b/src/main.rs @@ -1,20 +1,27 @@ +use std::net::SocketAddr; + use axum::{ Router, routing::{get, post}, }; use instance::Instance; -use middleware::auth; use routes::{ - comment::video_comments, + channel::{get_channel, list_channels, upload_channel}, + comment::get_comments, + index, + middleware::auth, video::{get_video, list_videos, upload_video}, }; use tokio::signal; use tracing::info; +mod channel; mod comment; +mod config; mod instance; -mod middleware; mod routes; +mod string; +mod url; mod video; #[tokio::main] @@ -25,25 +32,36 @@ async fn main() -> Result<(), Box> { let instance = Instance::new().await?; info!( - "Instance configuration:\n+ Host: {}\n+ Port: {}\n+ Videos per page: {}", - instance.config.host, instance.config.port, instance.config.videos_per_page + "Initialized instance successfully!\nConfiguration:\n- Address\n + Host: {}\n + Port: {}\n- Pagination\n + Videos per page: {}\n + Comments per page: {}\n + Channels per page: {}", + instance.config.address.host, + instance.config.address.port, + instance.config.pagination.videos, + instance.config.pagination.comments, + instance.config.pagination.channels, ); - let address = format!("{}:{}", instance.config.host, instance.config.port); + let address = instance.config.address.get_url(); let almond = Router::new() - .route("/upload", post(upload_video)) + .route("/upload_video", post(upload_video)) + .route("/upload_channel", post(upload_channel)) .route_layer(axum::middleware::from_fn_with_state(instance.clone(), auth)) - .route("/", get(list_videos)) + .route("/", get(index)) + .route("/videos", get(list_videos)) .route("/video/{id}", get(get_video)) - .route("/comments/{id}", get(video_comments)) + .route("/comments/{id}", get(get_comments)) + .route("/channels", get(list_channels)) + .route("/channel/{id}", get(get_channel)) .with_state(instance); let listener = tokio::net::TcpListener::bind(address).await?; - axum::serve(listener, almond.into_make_service()) - .with_graceful_shutdown(shutdown_signal()) - .await?; + axum::serve( + listener, + almond.into_make_service_with_connect_info::(), + ) + .with_graceful_shutdown(shutdown_signal()) + .await?; Ok(()) } diff --git a/src/routes/channel.rs b/src/routes/channel.rs new file mode 100755 index 0000000..7de9921 --- /dev/null +++ b/src/routes/channel.rs @@ -0,0 +1,150 @@ +use axum::{ + Json, + extract::{Path, Query, State}, + http::StatusCode, +}; +use serde::{Deserialize, Serialize}; +use tracing::{error, info}; +use url::Url; + +use crate::{ + channel::{Channel, ChannelError}, + instance::Instance, + url::is_youtube_url, +}; + +#[derive(Debug, Deserialize)] +pub struct ListChannelsQuery { + page: Option, +} + +#[derive(Debug, Serialize)] +pub struct ListChannelsResponse { + channels: Vec, + page: usize, + per_page: usize, + total: usize, + pages: usize, +} + +/// Retrieve video list as JSON (paged) +pub async fn list_channels( + State(state): State, + Query(query): Query, +) -> Result, StatusCode> { + let Ok(channels) = sqlx::query_as!(Channel, "SELECT * FROM channel") + .fetch_all(&state.pool) + .await + else { + error!("Could not fetch channels from database!"); + return Err(StatusCode::INTERNAL_SERVER_ERROR); + }; + + let per_page = state.config.pagination.channels; + let total = channels.len(); + let pages = total.div_ceil(per_page); + let page = query.page.unwrap_or(1).min(pages).max(1); + + let start = per_page * (page - 1); + let end = (start + per_page).min(total); + + let channels = if start < total { + channels[start..end].to_vec() + } else { + vec![] + }; + + Ok(Json(ListChannelsResponse { + channels, + page, + per_page, + total, + pages, + })) +} + +/// Get a single channel from the database by its ID +pub async fn get_channel( + State(state): State, + Path(id): Path, +) -> Result, StatusCode> { + sqlx::query_as!(Channel, "SELECT * FROM channel WHERE youtube_id = ?", id) + .fetch_optional(&state.pool) + .await + .map_or(Err(StatusCode::INTERNAL_SERVER_ERROR), |channel| { + channel.map_or(Err(StatusCode::NOT_FOUND), |c| Ok(Json(c))) + }) +} + +#[derive(Debug, Deserialize)] +pub struct UploadChannelQuery { + url: String, +} + +/// Upload a channel's metadata to the database +pub async fn upload_channel( + State(state): State, + Query(query): Query, +) -> StatusCode { + let id = match sqlx::query_scalar!("SELECT MAX(id) FROM channel") + .fetch_one(&state.pool) + .await + { + Ok(Some(max_id)) => max_id + 1, + Ok(None) => 0, + Err(_) => { + return StatusCode::INTERNAL_SERVER_ERROR; + } + }; + + let Ok(url) = Url::parse(&query.url) else { + error!("Could not parse URL!"); + return StatusCode::BAD_REQUEST; + }; + + if !is_youtube_url(&url) { + error!("YouTube URL RegEx match failed!"); + return StatusCode::BAD_REQUEST; + } + + let new_channel = Channel::from_url(&url, id).await.map_err(|e| match e { + ChannelError::AlreadyExists => StatusCode::OK, + _ => StatusCode::INTERNAL_SERVER_ERROR, + }); + + match new_channel { + Ok(channel) => { + match sqlx::query!( + " + INSERT INTO channel ( + id, url, youtube_id, name, handle_url, avatar_url, + banner_url, description, subscribers + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ", + channel.id, + channel.url, + channel.youtube_id, + channel.name, + channel.handle_url, + channel.avatar_url, + channel.banner_url, + channel.description, + channel.subscribers, + ) + .execute(&state.pool) + .await + { + Ok(result) => { + info!("Inserted channel to database successfully! {result:?}"); + StatusCode::OK + } + Err(e) => { + error!("Error inserting channel to database: {e:?}"); + StatusCode::INTERNAL_SERVER_ERROR + } + } + } + Err(status) => status, + } +} diff --git a/src/routes/comment.rs b/src/routes/comment.rs index ff12145..0ecfa0e 100755 --- a/src/routes/comment.rs +++ b/src/routes/comment.rs @@ -26,7 +26,7 @@ pub struct VideoCommentsResponse { } /// Fetches the comments from a video, will return an empty vec if the video has no comments -pub async fn video_comments( +pub async fn get_comments( State(state): State, Path(id): Path, Query(query): Query, @@ -41,7 +41,7 @@ pub async fn video_comments( match comments { Ok(comments) => { - let per_page = state.config.comments_per_page; + let per_page = state.config.pagination.comments; let total = comments.len(); let pages = total.div_ceil(per_page); let page = query.page.unwrap_or(1).max(1).min(pages); diff --git a/src/middleware.rs b/src/routes/middleware.rs similarity index 73% rename from src/middleware.rs rename to src/routes/middleware.rs index 197967c..63303f0 100755 --- a/src/middleware.rs +++ b/src/routes/middleware.rs @@ -1,11 +1,14 @@ +use std::net::SocketAddr; + use axum::{ - extract::{Request, State}, + extract::{ConnectInfo, Request, State}, http::StatusCode, middleware::Next, response::Response, }; use serde::Deserialize; use sha3::{Digest, Sha3_256}; +use tracing::error; use crate::instance::Instance; @@ -36,6 +39,7 @@ impl Key { } } pub async fn auth( + ConnectInfo(addr): ConnectInfo, State(state): State, req: Request, next: Next, @@ -45,13 +49,16 @@ pub async fn auth( .get("almond-api-key") .and_then(|h| h.to_str().ok()) else { - tracing::error!("Could not find almond-api-key header"); + error!("Could not find almond-api-key header (from {addr:?})"); return Err(StatusCode::UNAUTHORIZED); }; let key = Key(auth_header.into()); - match key.validate(&state) { - Ok(()) => Ok(next.run(req).await), - Err(_) => Err(StatusCode::UNAUTHORIZED), + + if matches!(key.validate(&state), Ok(())) { + Ok(next.run(req).await) + } else { + error!("Bad password for request (from {addr:?})"); + Err(StatusCode::UNAUTHORIZED) } } diff --git a/src/routes/mod.rs b/src/routes/mod.rs index f97533d..509cd2c 100755 --- a/src/routes/mod.rs +++ b/src/routes/mod.rs @@ -1,2 +1,21 @@ +use axum::Json; +use serde::Serialize; + +pub mod channel; pub mod comment; +pub mod middleware; pub mod video; + +#[derive(Debug, Serialize)] +pub struct IndexResponse { + app_name: String, + version: String, +} + +/// Get general information from the current Almond instance +pub async fn index() -> Json { + let app_name = "Almond".into(); + let version = env!("CARGO_PKG_VERSION").into(); + + Json(IndexResponse { app_name, version }) +} diff --git a/src/routes/video.rs b/src/routes/video.rs index 02858d5..9efc571 100755 --- a/src/routes/video.rs +++ b/src/routes/video.rs @@ -5,9 +5,11 @@ use axum::{ }; use serde::{Deserialize, Serialize}; use tracing::{error, info}; +use url::Url; use crate::{ instance::Instance, + url::is_youtube_url, video::{Video, VideoError}, }; @@ -38,10 +40,10 @@ pub async fn list_videos( return Err(StatusCode::INTERNAL_SERVER_ERROR); }; - let per_page = state.config.videos_per_page; + let per_page = state.config.pagination.videos; let total = videos.len(); let pages = total.div_ceil(per_page); - let page = query.page.unwrap_or(1).max(1).min(pages); + let page = query.page.unwrap_or(1).min(pages).max(1); let start = per_page * (page - 1); let end = (start + per_page).min(total); @@ -95,11 +97,23 @@ pub async fn upload_video( } }; - let new_video = Video::from_url(&query.url, id).await.map_err(|e| match e { - VideoError::InvalidUrl | VideoError::UrlParse(_) => StatusCode::BAD_REQUEST, - VideoError::AlreadyExists => StatusCode::OK, - _ => StatusCode::INTERNAL_SERVER_ERROR, - }); + let Ok(url) = Url::parse(&query.url) else { + error!("Could not parse URL!"); + return StatusCode::BAD_REQUEST; + }; + + if !is_youtube_url(&url) { + error!("YouTube URL RegEx match failed!"); + return StatusCode::BAD_REQUEST; + } + + let new_video = Video::from_url(&url, id) + .await + .map_err(|e| match e { + VideoError::InvalidUrl => StatusCode::BAD_REQUEST, + VideoError::AlreadyExists => StatusCode::OK, + _ => StatusCode::INTERNAL_SERVER_ERROR, + }); match new_video { Ok(video) => { diff --git a/src/string.rs b/src/string.rs new file mode 100755 index 0000000..70abfff --- /dev/null +++ b/src/string.rs @@ -0,0 +1,14 @@ +pub trait ToUnquotedString { + fn to_unquoted_string(&self) -> String; +} + +impl ToUnquotedString for serde_json::Value { + fn to_unquoted_string(&self) -> String { + self.to_string() + .strip_prefix('"') + .unwrap_or_default() + .strip_suffix('"') + .unwrap_or_default() + .into() + } +} diff --git a/src/url.rs b/src/url.rs new file mode 100755 index 0000000..c5b4a83 --- /dev/null +++ b/src/url.rs @@ -0,0 +1,7 @@ +use url::Url; + +pub fn is_youtube_url(url: &Url) -> bool { + let pattern = r"^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube(?:-nocookie)?\.com|youtu\.be))(\/(?:@[\w\-]+(?:\/[\w\-]+)?|(?:[\w\-]+\?v=|embed\/|live\/|v\/)?))?([\w\-]+)?(\S+)?$"; + let re = regex::Regex::new(pattern).unwrap(); + re.is_match(url.as_str()) +} diff --git a/src/video.rs b/src/video.rs index dc9eee8..fc0e143 100755 --- a/src/video.rs +++ b/src/video.rs @@ -1,18 +1,17 @@ use std::{io, path::Path}; -use regex::Regex; use serde::{Deserialize, Serialize}; use serde_json::Value; use sha3::{Digest, Sha3_256}; use thiserror::Error; use tokio::fs; use tracing::{error, info, warn}; -use url::{ParseError, Url}; +use url::Url; + +use crate::string::ToUnquotedString; #[derive(Debug, Error)] pub enum VideoError { - #[error("Failed to parse URL: {0}")] - UrlParse(#[from] ParseError), #[error("URL is an invalid YouTube URL")] InvalidUrl, #[error("Video already exists in database")] @@ -27,7 +26,7 @@ pub enum VideoError { MissingVideoFile, } -#[derive(Debug, Default, Serialize, Deserialize, Clone)] +#[derive(Debug, Serialize, Deserialize, Clone)] pub struct Video { pub id: i64, pub url: String, @@ -50,7 +49,7 @@ pub struct Video { impl Video { async fn yt_dlp_task(url: &str) -> Result<(), VideoError> { - let mut args = vec![ + let args = vec![ "--write-info-json", "--write-thumbnail", "--write-description", @@ -60,13 +59,11 @@ impl Video { "ReturnYoutubeDislike:when=pre_process", "-v", url, + "-o", + "videos/%(id)s/%(id)s.%(ext)s", + "-f", + "bestvideo[ext=mkv]+bestaudio[ext=m4a]/bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo*+bestaudio/best", ]; - args.append(&mut vec![ - "-o", - "videos/%(id)s/%(id)s.%(ext)s", - "-f", - "bestvideo[ext=mkv]+bestaudio[ext=m4a]/bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo*+bestaudio/best", - ]); let mut child = tokio::process::Command::new("yt-dlp").args(args).spawn()?; info!("yt-dlp task invoked"); child.wait().await?; @@ -75,53 +72,30 @@ impl Video { Ok(()) } - fn is_url_valid(url: &Url) -> bool { - let re = Regex::new(r"^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube(?:-nocookie)?\.com|youtu.be))(\/(?:[\w\-]+\?v=|embed\/|live\/|v\/)?)([\w\-]+)(\S+)?$").unwrap(); - if !re.is_match(url.as_str()) { - error!("YouTube URL RegEx match failed!"); - return false; - } - - true - } - - fn get_video_id(url: &Url) -> Option { + pub async fn from_url(url: &Url, id: i64) -> Result { let mut pairs = url.query_pairs(); let Some(query_v) = pairs.find(|(key, _)| key == "v") else { error!("Could not find 'v' query parameter in URL!"); - return None; + return Err(VideoError::InvalidUrl); }; - Some(query_v.1.to_string()) - } - - pub async fn from_url(url: &str, id: i64) -> Result { - let url = Url::parse(url)?; - info!("Parsed argument as URL"); - - if !Self::is_url_valid(&url) { - error!("URL is an invalid YouTube video!"); - return Err(VideoError::InvalidUrl); - } - let youtube_id = Self::get_video_id(&url).ok_or(VideoError::InvalidUrl)?; - + let youtube_id = query_v.1.to_string(); info!("URL is valid YouTube video, got ID '{youtube_id}'"); let dir = format!("videos/{youtube_id}"); let file_stem = format!("{dir}/{youtube_id}"); - if !Path::new(&dir).exists() { - fs::create_dir(dir).await?; - } - - let info_json = format!("{file_stem}.info.json"); - let info_json = Path::new(&info_json); // ? Uploading a video doesn't mean updating it, make a PUT route for that later - if info_json.exists() { + if Path::new(&dir).exists() { warn!("Video already exists, skipping"); return Err(VideoError::AlreadyExists); } + fs::create_dir(dir).await?; + + let info_json = format!("{file_stem}.info.json"); + let info_json = Path::new(&info_json); + Self::yt_dlp_task(url.as_str()).await?; let info: Value = serde_json::from_str(&fs::read_to_string(info_json).await?)?; @@ -143,8 +117,8 @@ impl Video { let url = url.to_string(); let description = format!("{file_stem}.description"); - let title = get_info_value("title").to_string(); - let author = get_info_value("uploader").to_string(); + let title = get_info_value("title").to_unquoted_string(); + let author = get_info_value("uploader").to_unquoted_string(); let views = get_info_value("view_count").as_i64().unwrap_or(-1); // Use RYD field from info JSON @@ -175,9 +149,9 @@ impl Video { let sha256 = format!("{:x}", Sha3_256::digest(&buffer)); #[allow(clippy::cast_possible_wrap)] let file_size = buffer.len() as i64; - let author_id = get_info_value("channel_id").to_string(); - let author_url = get_info_value("channel_url").to_string(); - let upload_date = get_info_value("upload_date").to_string(); + let author_id = get_info_value("channel_id").to_unquoted_string(); + let author_url = get_info_value("channel_url").to_unquoted_string(); + let upload_date = get_info_value("upload_date").to_unquoted_string(); let thumbnail = format!("{file_stem}.webp"); @@ -200,8 +174,6 @@ impl Video { thumbnail, }; - info!("Video entry so far: {video:?}"); - Ok(video) } }