From 632fe272fa9a8af079efccb190bc8ddeb3323896 Mon Sep 17 00:00:00 2001 From: realaravinth Date: Fri, 18 Feb 2022 21:03:27 +0530 Subject: [PATCH] feat: read_file processes directories SUMMARY This program supports directories in gists. This patch modifies Data::read_file to support directory reads. Additionally, CreateGistRequest is modified to accept files in subdirectories. DESCRIPTION Data::read_file When repository contains subdirectories, it will recursively read all files and return their contents. crate::data::api::v1::gists::GitFileMode Set of known(to me) file modes that Git uses. GitFileMode::Unsupported is used to take advantage of num_enum::FromPrimitive. The alternative would have been num_enum::TryFromPrimitive, which returns errors on unsupported values. I felt the former was cleaner. --- Cargo.lock | 32 +++++++ Cargo.toml | 1 + src/api/v1/gists.rs | 24 +++-- src/data/api/v1/gists.rs | 200 +++++++++++++++++++++++++++++++-------- 4 files changed, 207 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 14f7f02..0572067 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1054,6 +1054,7 @@ dependencies = [ "lazy_static", "log", "num_cpus", + "num_enum", "pretty_env_logger", "rand 0.8.4", "serde 1.0.136", @@ -1597,6 +1598,27 @@ dependencies = [ "libc", ] +[[package]] +name = "num_enum" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "720d3ea1055e4e4574c0c0b0f8c3fd4f24c4cdaf465948206dea090b57b526ad" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d992b768490d7fe0d8586d9b5745f6c49f557da6d81dc982b1d167ad4edbb21" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "num_threads" version = "0.1.3" @@ -1793,6 +1815,16 @@ dependencies = [ "log", ] +[[package]] +name = "proc-macro-crate" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dada8c9981fcf32929c3c0f0cd796a9284aca335565227ed88c83babb1d43dc" +dependencies = [ + "thiserror", + "toml", +] + [[package]] name = "proc-macro-error" version = "1.0.4" diff --git a/Cargo.toml b/Cargo.toml index fd0d3b8..d221424 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ tokio = { version = "1.16.1", features = ["fs"] } url = "2.2" urlencoding = "2.1.0" validator = { version = "0.14.0", features = ["derive"] } +num_enum = "0.5.6" [dev-dependencies] actix-rt = "2" diff --git a/src/api/v1/gists.rs b/src/api/v1/gists.rs index c5204bd..da01e15 100644 --- a/src/api/v1/gists.rs +++ b/src/api/v1/gists.rs @@ -20,16 +20,22 @@ use actix_web::*; use db_core::prelude::*; use serde::{Deserialize, Serialize}; -use crate::data::api::v1::gists::{CreateGist, File, GistID}; +use crate::data::api::v1::gists::{CreateGist, FileInfo, GistID}; use crate::errors::*; use crate::*; +//#[derive(Serialize, Deserialize, Debug, Clone)] +//pub struct File { +// pub filename: String, +// pub content: ContentType, +//} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CreateGistRequest { #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, pub visibility: GistVisibility, - pub files: Vec, + pub files: Vec, } impl CreateGistRequest { @@ -74,7 +80,7 @@ async fn new( #[cfg(test)] mod tests { use super::*; - use crate::data::api::v1::gists::ContentType; + use crate::data::api::v1::gists::{ContentType, FileType}; use crate::tests::*; #[actix_rt::test] @@ -96,17 +102,17 @@ mod tests { let app = get_app!(data, db).await; let files = [ - File { + FileInfo { filename: "foo".into(), - content: ContentType::Text("foobar".into()), + content: FileType::File(ContentType::Text("foobar".into())), }, - File { + FileInfo { filename: "bar".into(), - content: ContentType::Text("foobar".into()), + content: FileType::File(ContentType::Text("foobar".into())), }, - File { + FileInfo { filename: "foo bar".into(), - content: ContentType::Text("foobar".into()), + content: FileType::File(ContentType::Text("foobar".into())), }, ]; diff --git a/src/data/api/v1/gists.rs b/src/data/api/v1/gists.rs index eb4e48f..4c8ec57 100644 --- a/src/data/api/v1/gists.rs +++ b/src/data/api/v1/gists.rs @@ -18,6 +18,7 @@ use std::path::{Path, PathBuf}; use db_core::prelude::*; use git2::*; +use num_enum::FromPrimitive; use serde::{Deserialize, Serialize}; use tokio::fs; @@ -25,6 +26,53 @@ use crate::errors::*; use crate::utils::*; use crate::*; +/// A FileMode represents the kind of tree entries used by git. It +/// resembles regular file systems modes, although FileModes are +/// considerably simpler (there are not so many), and there are some, +/// like Submodule that has no file system equivalent. +// Adapted from https://github.com/go-git/go-git/blob/master/plumbing/filemode/filemode.go(Apache-2.0 License) +#[derive(Debug, PartialEq, Clone, FromPrimitive)] +#[repr(isize)] +pub enum GitFileMode { + /// Empty is used as the GitFileMode of tree elements when comparing + /// trees in the following situations: + /// + /// - the mode of tree elements before their creation. + /// - the mode of tree elements after their deletion. + /// - the mode of unmerged elements when checking the index. + /// + /// Empty has no file system equivalent. As Empty is the zero value + /// of [GitFileMode] + Empty = 0, + /// Regular represent non-executable files. + Regular = 0o100644, + /// Dir represent a Directory. + Dir = 0o40000, + /// Deprecated represent non-executable files with the group writable bit set. This mode was + /// supported by the first versions of git, but it has been deprecated nowadays. This + /// library(github.com/go-git/go-git uses it, not realaravinth/gists at the moment) uses them + /// internally, so you can read old packfiles, but will treat them as Regulars when interfacing + /// with the outside world. This is the standard git behaviour. + Deprecated = 0o100664, + /// Executable represents executable files. + Executable = 0o100755, + /// Symlink represents symbolic links to files. + Symlink = 0o120000, + /// Submodule represents git submodules. This mode has no file system + /// equivalent. + Submodule = 0o160000, + + /// Unsupported file mode + #[num_enum(default)] + Unsupported = -1, +} + +impl From<&'_ TreeEntry<'_>> for GitFileMode { + fn from(t: &TreeEntry) -> Self { + GitFileMode::from(t.filemode() as isize) + } +} + pub struct Gist { pub id: String, pub repository: git2::Repository, @@ -36,17 +84,26 @@ pub struct CreateGist<'a> { pub visibility: &'a GistVisibility, } -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct File { - pub filename: String, - pub content: ContentType, -} - pub enum GistID<'a> { Repository(&'a mut git2::Repository), ID(&'a str), } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub struct FileInfo { + pub filename: String, + pub content: FileType, +} + +pub struct GistInfo { + pub files: Vec, + pub description: String, + pub owner: String, + pub created: i64, + pub updated: i64, + pub visibility: GistVisibility, +} + #[derive(Serialize, PartialEq, Clone, Debug, Deserialize)] #[serde(rename_all = "lowercase")] pub enum ContentType { @@ -71,12 +128,12 @@ impl ContentType { } } -#[derive(Serialize, Deserialize, Debug, Clone)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] #[serde(rename_all = "lowercase")] pub enum FileType { /// Contains file content File(ContentType), - Dir, + Dir(Vec), } impl Data { @@ -126,7 +183,7 @@ impl Data { &self, _db: &T, gist_id: GistID<'_>, - files: &[File], + files: &[FileInfo], ) -> ServiceResult<()> { // TODO change updated in DB let inner = |repo: &mut Repository| -> ServiceResult<()> { @@ -136,12 +193,15 @@ impl Data { for file in files.iter() { let escaped_filename = escape_spaces(&file.filename); - let obj = odb - .write(ObjectType::Blob, file.content.as_bytes()) - .unwrap(); - tree_builder - .insert(&escaped_filename, obj, 0o100644) - .unwrap(); + match &file.content { + FileType::Dir(dir_contents) => unimplemented!(), + FileType::File(f) => { + let obj = odb.write(ObjectType::Blob, f.as_bytes()).unwrap(); + tree_builder + .insert(&escaped_filename, obj, 0o100644) + .unwrap(); + } + } } let tree_hash = tree_builder.write().unwrap(); @@ -178,14 +238,10 @@ impl Data { match gist_id { GistID::ID(path) => { let mut repo = git2::Repository::open(self.get_repository_path(path)).unwrap(); - inner(&mut repo)?; + inner(&mut repo) } - GistID::Repository(repository) => { - inner(repository)?; - } - }; - - Ok(()) + GistID::Repository(repository) => inner(repository), + } } /// Please note that this method expects path to not contain any spaces @@ -197,15 +253,73 @@ impl Data { pub async fn read_file( &self, _db: &T, - gist_id: &str, + gist_id: GistID<'_>, path: &str, - ) -> ServiceResult { - let repo = git2::Repository::open(self.get_repository_path(gist_id)).unwrap(); - let head = repo.head().unwrap(); - let tree = head.peel_to_tree().unwrap(); - let entry = tree.get_path(Path::new(path)).unwrap(); - let blob = repo.find_blob(entry.id()).unwrap(); - Ok(ContentType::from_blob(&blob)) + ) -> ServiceResult { + let inner = |repo: &git2::Repository| -> ServiceResult { + let head = repo.head().unwrap(); + let tree = head.peel_to_tree().unwrap(); + let entry = tree.get_path(Path::new(path)).unwrap(); + GitFileMode::Regular as i32; + + fn read_file(id: Oid, repo: &git2::Repository) -> FileType { + let blob = repo.find_blob(id).unwrap(); + FileType::File(ContentType::from_blob(&blob)) + } + + fn read_dir(id: Oid, repo: &Repository) -> FileType { + let tree = repo.find_tree(id).unwrap(); + let mut items = Vec::with_capacity(tree.len()); + for item in tree.iter() { + println!("{:?}", &item.name()); + if let Some(name) = item.name() { + let mode: GitFileMode = (&item).into(); + let file = match mode { + GitFileMode::Dir => read_dir(item.id(), repo), + GitFileMode::Submodule => unimplemented!(), + GitFileMode::Empty => unimplemented!(), + GitFileMode::Deprecated => unimplemented!(), + GitFileMode::Unsupported => unimplemented!(), + GitFileMode::Symlink => unimplemented!(), + GitFileMode::Executable => read_file(item.id(), repo), + GitFileMode::Regular => read_file(item.id(), repo), + }; + items.push(FileInfo { + filename: name.to_owned(), + content: file, + }); + } + } + FileType::Dir(items) + } + let mode: GitFileMode = (&entry).into(); + if let Some(name) = entry.name() { + let file = match mode { + GitFileMode::Dir => read_dir(entry.id(), repo), + GitFileMode::Submodule => unimplemented!(), + GitFileMode::Empty => unimplemented!(), + GitFileMode::Deprecated => unimplemented!(), + GitFileMode::Unsupported => unimplemented!(), + GitFileMode::Symlink => unimplemented!(), + GitFileMode::Executable => read_file(entry.id(), repo), + GitFileMode::Regular => read_file(entry.id(), repo), + }; + Ok(FileInfo { + filename: name.to_string(), + content: file, + }) + } else { + unimplemented!(); + } + }; + + match gist_id { + GistID::ID(path) => { + let repo = git2::Repository::open(self.get_repository_path(path)).unwrap(); + inner(&repo) + } + GistID::Repository(repository) => inner(repository), + } } } @@ -233,14 +347,18 @@ pub mod tests { &self, db: &T, gist_id: &str, - files: &[File], + files: &[FileInfo], ) { for file in files.iter() { let content = self - .read_file(db, &gist_id, &escape_spaces(&file.filename)) + .read_file(db, GistID::ID(&gist_id), &escape_spaces(&file.filename)) .await .unwrap(); - assert_eq!(content, file.content); + let req_escaped_file = FileInfo { + filename: escape_spaces(&file.filename), + content: file.content.clone(), + }; + assert_eq!(&content, &req_escaped_file); } } } @@ -272,17 +390,17 @@ pub mod tests { // save files let files = [ - File { + FileInfo { filename: "foo".into(), - content: ContentType::Text("foobar".into()), + content: FileType::File(ContentType::Text("foobar".into())), }, - File { + FileInfo { filename: "bar".into(), - content: ContentType::Text("foobar".into()), + content: FileType::File(ContentType::Text("foobar".into())), }, - File { + FileInfo { filename: "foo bar".into(), - content: ContentType::Text("foobar".into()), + content: FileType::File(ContentType::Text("foobar".into())), }, ]; @@ -290,9 +408,9 @@ pub mod tests { .await .unwrap(); data.gist_files_written_helper(db, &gist.id, &files).await; - let files2 = [File { + let files2 = [FileInfo { filename: "notfirstcommit".into(), - content: ContentType::Text("foobar".into()), + content: FileType::File(ContentType::Text("foobar".into())), }]; data.write_file(db, GistID::ID(&gist.id), &files2)