diff --git a/Cargo.lock b/Cargo.lock index 08e5f2d..91e6dd7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -226,6 +226,7 @@ dependencies = [ "clap", "epub", "html2md", + "regex", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 0f66d43..295ec2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,4 @@ anyhow = "1.0.96" clap = { version = "4.5.30", features = ["derive"] } epub = "2.1.1" html2md = "0.2.15" +regex = "1.11.1" diff --git a/src/lib.rs b/src/lib.rs index 449e184..b378a06 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ use epub::doc::{EpubDoc, NavPoint}; use html2md::parse_html; +use regex::{Captures, Regex}; use std::collections::HashMap; use std::fs; use std::io::{Read, Seek}; @@ -9,28 +10,31 @@ pub fn convert_epub_to_mdbook( epub_path: impl AsRef, output_dir: Option>, ) -> anyhow::Result<()> { - let book_name = epub_path.as_ref().with_extension(""); - let book_name = book_name.file_name().unwrap().to_str().unwrap(); - let pwd = PathBuf::from("."); + let epub_path = epub_path.as_ref(); + if !epub_path.is_file() { + return Err(anyhow::anyhow!("{} is not a file", epub_path.display())); + } + let book_name = epub_path.with_extension(""); + let book_name = book_name.file_name().unwrap().to_string_lossy().to_string(); let output_dir = match output_dir { - Some(output_dir) => output_dir.as_ref().join(book_name), - None => pwd.join(book_name), + Some(output_dir) => output_dir.as_ref().join(&book_name), + None => PathBuf::from(".").join(&book_name), }; fs::create_dir_all(output_dir.join("src"))?; - let mut doc = EpubDoc::new(&epub_path)?; + let mut doc = EpubDoc::new(epub_path)?; let title = if let Some(title) = doc.metadata.get("title") { - title.first().unwrap_or(&book_name.to_string()).clone() + title.first().cloned().unwrap_or(book_name) } else { - book_name.to_string() + book_name }; let creator = doc.metadata.get("creator").and_then(|v| v.first().cloned()); - let (toc, html_files) = toc_to_md(&doc, &title)?; + let (toc, html_to_md) = toc_to_md(&doc, &title)?; fs::write(output_dir.join("src/SUMMARY.md"), toc)?; - extract_chapters_and_resources(&mut doc, &output_dir, &html_files)?; + extract_chapters_and_resources(&mut doc, &output_dir, &html_to_md)?; write_book_toml(&output_dir, &title, creator)?; Ok(()) } @@ -45,7 +49,7 @@ pub fn nav_point_to_md( "{}- [{}]({})\n", " ".repeat(indent), nav.label, - file.to_str()? + file.to_string_lossy() ); for child in &nav.children { if let Some(child_md) = nav_point_to_md(child, indent + 1, html_files) { @@ -62,41 +66,53 @@ pub fn toc_to_md( let toc = doc.toc.clone(); let mut markdown = format!("# {}\n\n", title); - let html_files = doc + let html_to_md = doc .resources .iter() .filter(|(_, (_, mime))| mime == "application/xhtml+xml") .map(|(_, (path, _))| (path.clone(), path.with_extension("md"))) .collect::>(); for nav in toc { - if let Some(md) = nav_point_to_md(&nav, 0, &html_files) { + if let Some(md) = nav_point_to_md(&nav, 0, &html_to_md) { markdown.push_str(&md); } } - Ok((markdown, html_files)) + Ok((markdown, html_to_md)) } pub fn extract_chapters_and_resources( doc: &mut EpubDoc, output_dir: impl AsRef, - html_files: &HashMap, + html_to_md: &HashMap, ) -> anyhow::Result<()> { let output_dir = output_dir.as_ref(); let src_dir = output_dir.join("src"); - + let re = Regex::new(r#"\[[^\]]+\]\(([^)]+)\)"#).unwrap(); // [abc](abc.html) for (_, (path, _)) in doc.resources.clone().into_iter() { let content = match doc.get_resource_by_path(&path) { Some(content) => content, None => continue, }; - if let Some(path) = html_files.get(&path) { + if let Some(path) = html_to_md.get(&path) { let target_path = src_dir.join(path); if let Some(parent) = target_path.parent() { fs::create_dir_all(parent)?; } let html = String::from_utf8(content)?; let markdown = parse_html(&html); + let markdown = re + .replace_all(&markdown, |caps: &Captures| { + let link = caps[1].to_string(); + let ori = caps[0].to_string(); + if let Some(md_path) = html_to_md.get(&PathBuf::from(&link)) { + let md_path = md_path.to_string_lossy().to_string(); + ori.replace(&link, &md_path) + } else { + ori + } + }) + .to_string(); fs::write(target_path, markdown)?; } else { let target_path = src_dir.join(&path); @@ -123,3 +139,18 @@ pub fn write_book_toml( fs::write(output_dir.join("book.toml"), toml_content)?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_replace_links() { + let markdown = r"[hello](hello.html)"; + let re = Regex::new(r#"\[[^\]]+\]\(([^)]+)\)"#).unwrap(); + let markdown = re.replace_all(&markdown, |caps: &Captures| { + let link = caps[1].to_string(); + caps[0].replace(&link, "hello.md") + }); + assert_eq!(markdown, "[hello](hello.md)"); + } +} diff --git a/src/main.rs b/src/main.rs index ce38a4e..ec905d2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,7 +7,7 @@ use epub2mdbook::convert_epub_to_mdbook; struct Args { /// The path to the input EPUB file #[clap(short, long)] - input_epub_path: PathBuf, + input_epub: PathBuf, /// The path to the output directory #[clap(short, long)] output_dir: Option, @@ -15,7 +15,7 @@ struct Args { fn main() -> anyhow::Result<()> { let args = Args::parse(); - convert_epub_to_mdbook(args.input_epub_path, args.output_dir)?; + convert_epub_to_mdbook(args.input_epub, args.output_dir)?; println!("Conversion completed successfully!"); Ok(()) }