diff --git a/Cargo.lock b/Cargo.lock index 0d5f8ce..8a1cdcd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -192,7 +192,7 @@ dependencies = [ [[package]] name = "epub2mdbook" -version = "0.8.0" +version = "0.9.0" dependencies = [ "clap", "epub", diff --git a/Cargo.toml b/Cargo.toml index d72e12c..3770e3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "epub2mdbook" -version = "0.8.0" +version = "0.9.0" edition = "2024" description = "A tool to convert EPUB files to MDBook format" authors = ["Maverick Liu "] diff --git a/src/lib.rs b/src/lib.rs index a6cce97..d62d9af 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ use std::{fs, io}; /// # Arguments /// /// * `epub_path` - The path to the EPUB file -/// * `output_dir` - The path to the output directory, pwd by default +/// * `output_dir` - The path to the output directory, working directory by default /// pub fn convert_epub_to_mdbook( epub_path: impl AsRef, @@ -37,21 +37,24 @@ pub fn convert_epub_to_mdbook( }; fs::create_dir_all(output_dir.join("src"))?; - let mut doc = EpubDoc::new(epub_path)?; - let title = doc + let mut epub_doc = EpubDoc::new(epub_path)?; + let title = epub_doc .metadata .get("title") .and_then(|v| v.first().cloned()) .unwrap_or(book_name); - let creator = doc.metadata.get("creator").and_then(|v| v.first().cloned()); - let (toc, html_to_md) = toc_to_md(&doc, &title); - extract_chapters_and_resources(&mut doc, &output_dir, &html_to_md)?; - fs::write(output_dir.join("src/SUMMARY.md"), toc)?; + let creator = epub_doc + .metadata + .get("creator") + .and_then(|v| v.first().cloned()); + let (summary_md, html_to_md) = generate_summary_md(&epub_doc, &title); + extract_chapters_and_resources(&mut epub_doc, &output_dir, &html_to_md)?; + fs::write(output_dir.join("src/SUMMARY.md"), summary_md)?; write_book_toml(&output_dir, &title, creator)?; Ok(()) } -fn nav_to_md( +fn epub_nav_to_md( nav: &NavPoint, indent: usize, html_to_md: &HashMap, @@ -64,14 +67,14 @@ fn nav_to_md( file.to_string_lossy() ); for child in &nav.children { - if let Some(child_md) = nav_to_md(child, indent + 1, html_to_md) { + if let Some(child_md) = epub_nav_to_md(child, indent + 1, html_to_md) { md.push_str(&child_md); } } Some(md) } -/// Convert the table of contents to SUMMARY.md +/// generate SUMMARY.md and the file mapping from html to md /// /// # Arguments /// @@ -82,21 +85,19 @@ fn nav_to_md( /// /// * `summary_md` - The SUMMARY.md content /// * `html_to_md` - The file mapping from html to md -pub fn toc_to_md( - doc: &EpubDoc, +pub fn generate_summary_md( + epub_doc: &EpubDoc, title: &str, ) -> (String, HashMap) { - let toc = doc.toc.clone(); - let mut summary_md = format!("# {}\n\n", title); - let html_to_md = doc + let html_to_md = epub_doc .resources .iter() - .filter(|(_, (_, mime))| mime == "application/xhtml+xml") + .filter(|(_, (_, mime))| ["application/xhtml+xml", "text/html"].contains(&&**mime)) .map(|(_, (path, _))| (path.clone(), path.with_extension("md"))) .collect::>(); - for nav in toc { - if let Some(md) = nav_to_md(&nav, 0, &html_to_md) { + for nav in &epub_doc.toc { + if let Some(md) = epub_nav_to_md(nav, 0, &html_to_md) { summary_md.push_str(&md); } } @@ -104,7 +105,7 @@ pub fn toc_to_md( } fn extract_chapters_and_resources( - doc: &mut EpubDoc, + epub_doc: &mut EpubDoc, output_dir: impl AsRef, html_to_md: &HashMap, ) -> Result<(), Error> { @@ -114,8 +115,8 @@ fn extract_chapters_and_resources( .collect::>(); let output_dir = output_dir.as_ref(); let src_dir = output_dir.join("src"); - for (_, (path, _)) in doc.resources.clone().into_iter() { - let mut content = match doc.get_resource_by_path(&path) { + for (_, (path, _)) in epub_doc.resources.clone().into_iter() { + let mut content = match epub_doc.get_resource_by_path(&path) { Some(content) => content, None => continue, // unreachable }; @@ -148,7 +149,7 @@ fn extract_chapters_and_resources( /// [ABC]({abc.html}) /// ``` static LINK: LazyLock = LazyLock::new(|| { - Regex::new(r#"\[[^\]]+\]\(?P([^#)]+)(?:#[^)]+)?\)"#).expect("unreachable") + Regex::new(r#"\[[^\]]+\]\((?P[^#)]+)(#[^)]+)?\)"#).expect("unreachable") }); /// Match the URL link, eg: /// ``` @@ -204,9 +205,9 @@ mod tests { fn test_replace_links() { let markdown = r"[hello](hello.html#xxx) [hi](hi.xhtml)"; let markdown = LINK.replace_all(&markdown, |caps: &Captures| { - let link = caps[1].to_string(); - caps[0].replace(&link, "hello.md") + let link = &caps["link"]; + caps[0].replace(link, "link.md") }); - assert_eq!(markdown, "[hello](hello.md#xxx) [hi](hello.md)"); + assert_eq!(markdown, "[hello](link.md#xxx) [hi](link.md)"); } } diff --git a/src/main.rs b/src/main.rs index 3f114bf..09acbc9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,7 @@ struct Args { /// The path to the input EPUB file #[clap(short, long)] input_epub: PathBuf, - /// The path to the output directory, pwd by default + /// The path to the output directory, working directory by default #[clap(short, long)] output_dir: Option, }