mirror of
https://github.com/cyborg42/epub2mdbook.git
synced 2026-02-10 16:24:50 -05:00
fix regex
This commit is contained in:
parent
a84969da20
commit
24a373c45a
4 changed files with 29 additions and 28 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -192,7 +192,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "epub2mdbook"
|
name = "epub2mdbook"
|
||||||
version = "0.8.0"
|
version = "0.9.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"epub",
|
"epub",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "epub2mdbook"
|
name = "epub2mdbook"
|
||||||
version = "0.8.0"
|
version = "0.9.0"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
description = "A tool to convert EPUB files to MDBook format"
|
description = "A tool to convert EPUB files to MDBook format"
|
||||||
authors = ["Maverick Liu <maverick.liu42@gmail.com>"]
|
authors = ["Maverick Liu <maverick.liu42@gmail.com>"]
|
||||||
|
|
|
||||||
51
src/lib.rs
51
src/lib.rs
|
|
@ -15,7 +15,7 @@ use std::{fs, io};
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `epub_path` - The path to the EPUB file
|
/// * `epub_path` - The path to the EPUB file
|
||||||
/// * `output_dir` - The path to the output directory, pwd by default
|
/// * `output_dir` - The path to the output directory, working directory by default
|
||||||
///
|
///
|
||||||
pub fn convert_epub_to_mdbook(
|
pub fn convert_epub_to_mdbook(
|
||||||
epub_path: impl AsRef<Path>,
|
epub_path: impl AsRef<Path>,
|
||||||
|
|
@ -37,21 +37,24 @@ pub fn convert_epub_to_mdbook(
|
||||||
};
|
};
|
||||||
fs::create_dir_all(output_dir.join("src"))?;
|
fs::create_dir_all(output_dir.join("src"))?;
|
||||||
|
|
||||||
let mut doc = EpubDoc::new(epub_path)?;
|
let mut epub_doc = EpubDoc::new(epub_path)?;
|
||||||
let title = doc
|
let title = epub_doc
|
||||||
.metadata
|
.metadata
|
||||||
.get("title")
|
.get("title")
|
||||||
.and_then(|v| v.first().cloned())
|
.and_then(|v| v.first().cloned())
|
||||||
.unwrap_or(book_name);
|
.unwrap_or(book_name);
|
||||||
let creator = doc.metadata.get("creator").and_then(|v| v.first().cloned());
|
let creator = epub_doc
|
||||||
let (toc, html_to_md) = toc_to_md(&doc, &title);
|
.metadata
|
||||||
extract_chapters_and_resources(&mut doc, &output_dir, &html_to_md)?;
|
.get("creator")
|
||||||
fs::write(output_dir.join("src/SUMMARY.md"), toc)?;
|
.and_then(|v| v.first().cloned());
|
||||||
|
let (summary_md, html_to_md) = generate_summary_md(&epub_doc, &title);
|
||||||
|
extract_chapters_and_resources(&mut epub_doc, &output_dir, &html_to_md)?;
|
||||||
|
fs::write(output_dir.join("src/SUMMARY.md"), summary_md)?;
|
||||||
write_book_toml(&output_dir, &title, creator)?;
|
write_book_toml(&output_dir, &title, creator)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn nav_to_md(
|
fn epub_nav_to_md(
|
||||||
nav: &NavPoint,
|
nav: &NavPoint,
|
||||||
indent: usize,
|
indent: usize,
|
||||||
html_to_md: &HashMap<PathBuf, PathBuf>,
|
html_to_md: &HashMap<PathBuf, PathBuf>,
|
||||||
|
|
@ -64,14 +67,14 @@ fn nav_to_md(
|
||||||
file.to_string_lossy()
|
file.to_string_lossy()
|
||||||
);
|
);
|
||||||
for child in &nav.children {
|
for child in &nav.children {
|
||||||
if let Some(child_md) = nav_to_md(child, indent + 1, html_to_md) {
|
if let Some(child_md) = epub_nav_to_md(child, indent + 1, html_to_md) {
|
||||||
md.push_str(&child_md);
|
md.push_str(&child_md);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(md)
|
Some(md)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convert the table of contents to SUMMARY.md
|
/// generate SUMMARY.md and the file mapping from html to md
|
||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
|
|
@ -82,21 +85,19 @@ fn nav_to_md(
|
||||||
///
|
///
|
||||||
/// * `summary_md` - The SUMMARY.md content
|
/// * `summary_md` - The SUMMARY.md content
|
||||||
/// * `html_to_md` - The file mapping from html to md
|
/// * `html_to_md` - The file mapping from html to md
|
||||||
pub fn toc_to_md<R: Read + Seek>(
|
pub fn generate_summary_md<R: Read + Seek>(
|
||||||
doc: &EpubDoc<R>,
|
epub_doc: &EpubDoc<R>,
|
||||||
title: &str,
|
title: &str,
|
||||||
) -> (String, HashMap<PathBuf, PathBuf>) {
|
) -> (String, HashMap<PathBuf, PathBuf>) {
|
||||||
let toc = doc.toc.clone();
|
|
||||||
|
|
||||||
let mut summary_md = format!("# {}\n\n", title);
|
let mut summary_md = format!("# {}\n\n", title);
|
||||||
let html_to_md = doc
|
let html_to_md = epub_doc
|
||||||
.resources
|
.resources
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|(_, (_, mime))| mime == "application/xhtml+xml")
|
.filter(|(_, (_, mime))| ["application/xhtml+xml", "text/html"].contains(&&**mime))
|
||||||
.map(|(_, (path, _))| (path.clone(), path.with_extension("md")))
|
.map(|(_, (path, _))| (path.clone(), path.with_extension("md")))
|
||||||
.collect::<HashMap<PathBuf, PathBuf>>();
|
.collect::<HashMap<PathBuf, PathBuf>>();
|
||||||
for nav in toc {
|
for nav in &epub_doc.toc {
|
||||||
if let Some(md) = nav_to_md(&nav, 0, &html_to_md) {
|
if let Some(md) = epub_nav_to_md(nav, 0, &html_to_md) {
|
||||||
summary_md.push_str(&md);
|
summary_md.push_str(&md);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -104,7 +105,7 @@ pub fn toc_to_md<R: Read + Seek>(
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_chapters_and_resources<R: Read + Seek>(
|
fn extract_chapters_and_resources<R: Read + Seek>(
|
||||||
doc: &mut EpubDoc<R>,
|
epub_doc: &mut EpubDoc<R>,
|
||||||
output_dir: impl AsRef<Path>,
|
output_dir: impl AsRef<Path>,
|
||||||
html_to_md: &HashMap<PathBuf, PathBuf>,
|
html_to_md: &HashMap<PathBuf, PathBuf>,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
|
|
@ -114,8 +115,8 @@ fn extract_chapters_and_resources<R: Read + Seek>(
|
||||||
.collect::<HashMap<_, _>>();
|
.collect::<HashMap<_, _>>();
|
||||||
let output_dir = output_dir.as_ref();
|
let output_dir = output_dir.as_ref();
|
||||||
let src_dir = output_dir.join("src");
|
let src_dir = output_dir.join("src");
|
||||||
for (_, (path, _)) in doc.resources.clone().into_iter() {
|
for (_, (path, _)) in epub_doc.resources.clone().into_iter() {
|
||||||
let mut content = match doc.get_resource_by_path(&path) {
|
let mut content = match epub_doc.get_resource_by_path(&path) {
|
||||||
Some(content) => content,
|
Some(content) => content,
|
||||||
None => continue, // unreachable
|
None => continue, // unreachable
|
||||||
};
|
};
|
||||||
|
|
@ -148,7 +149,7 @@ fn extract_chapters_and_resources<R: Read + Seek>(
|
||||||
/// [ABC]({abc.html})
|
/// [ABC]({abc.html})
|
||||||
/// ```
|
/// ```
|
||||||
static LINK: LazyLock<Regex> = LazyLock::new(|| {
|
static LINK: LazyLock<Regex> = LazyLock::new(|| {
|
||||||
Regex::new(r#"\[[^\]]+\]\(?P<link>([^#)]+)(?:#[^)]+)?\)"#).expect("unreachable")
|
Regex::new(r#"\[[^\]]+\]\((?P<link>[^#)]+)(#[^)]+)?\)"#).expect("unreachable")
|
||||||
});
|
});
|
||||||
/// Match the URL link, eg:
|
/// Match the URL link, eg:
|
||||||
/// ```
|
/// ```
|
||||||
|
|
@ -204,9 +205,9 @@ mod tests {
|
||||||
fn test_replace_links() {
|
fn test_replace_links() {
|
||||||
let markdown = r"[hello](hello.html#xxx) [hi](hi.xhtml)";
|
let markdown = r"[hello](hello.html#xxx) [hi](hi.xhtml)";
|
||||||
let markdown = LINK.replace_all(&markdown, |caps: &Captures| {
|
let markdown = LINK.replace_all(&markdown, |caps: &Captures| {
|
||||||
let link = caps[1].to_string();
|
let link = &caps["link"];
|
||||||
caps[0].replace(&link, "hello.md")
|
caps[0].replace(link, "link.md")
|
||||||
});
|
});
|
||||||
assert_eq!(markdown, "[hello](hello.md#xxx) [hi](hello.md)");
|
assert_eq!(markdown, "[hello](link.md#xxx) [hi](link.md)");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ struct Args {
|
||||||
/// The path to the input EPUB file
|
/// The path to the input EPUB file
|
||||||
#[clap(short, long)]
|
#[clap(short, long)]
|
||||||
input_epub: PathBuf,
|
input_epub: PathBuf,
|
||||||
/// The path to the output directory, pwd by default
|
/// The path to the output directory, working directory by default
|
||||||
#[clap(short, long)]
|
#[clap(short, long)]
|
||||||
output_dir: Option<PathBuf>,
|
output_dir: Option<PathBuf>,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue