mirror of
https://github.com/cyborg42/epub2mdbook.git
synced 2026-02-10 16:24:50 -05:00
dev
This commit is contained in:
parent
6bcc1d9682
commit
5d10769637
6 changed files with 115 additions and 49 deletions
36
Cargo.lock
generated
36
Cargo.lock
generated
|
|
@ -67,12 +67,6 @@ dependencies = [
|
||||||
"windows-sys",
|
"windows-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "anyhow"
|
|
||||||
version = "1.0.96"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arbitrary"
|
name = "arbitrary"
|
||||||
version = "1.4.1"
|
version = "1.4.1"
|
||||||
|
|
@ -213,7 +207,7 @@ checksum = "f7425903972972788e21d4a8cbec3589c232cca4fab21d320187a8e452f6465e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"regex",
|
"regex",
|
||||||
"thiserror",
|
"thiserror 1.0.69",
|
||||||
"xml-rs",
|
"xml-rs",
|
||||||
"zip",
|
"zip",
|
||||||
]
|
]
|
||||||
|
|
@ -222,11 +216,11 @@ dependencies = [
|
||||||
name = "epub2mdbook"
|
name = "epub2mdbook"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
|
||||||
"clap",
|
"clap",
|
||||||
"epub",
|
"epub",
|
||||||
"html2md",
|
"html2md",
|
||||||
"regex",
|
"regex",
|
||||||
|
"thiserror 2.0.11",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -321,7 +315,7 @@ dependencies = [
|
||||||
"combine",
|
"combine",
|
||||||
"jni-sys",
|
"jni-sys",
|
||||||
"log",
|
"log",
|
||||||
"thiserror",
|
"thiserror 1.0.69",
|
||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -698,7 +692,16 @@ version = "1.0.69"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"thiserror-impl",
|
"thiserror-impl 1.0.69",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror"
|
||||||
|
version = "2.0.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc"
|
||||||
|
dependencies = [
|
||||||
|
"thiserror-impl 2.0.11",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -712,6 +715,17 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror-impl"
|
||||||
|
version = "2.0.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "toml_datetime"
|
name = "toml_datetime"
|
||||||
version = "0.6.8"
|
version = "0.6.8"
|
||||||
|
|
@ -878,5 +892,5 @@ dependencies = [
|
||||||
"flate2",
|
"flate2",
|
||||||
"indexmap",
|
"indexmap",
|
||||||
"num_enum",
|
"num_enum",
|
||||||
"thiserror",
|
"thiserror 1.0.69",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,16 @@
|
||||||
name = "epub2mdbook"
|
name = "epub2mdbook"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
description = "A tool to convert EPUB files to MDBook format"
|
||||||
|
authors = ["Maverick Liu <maverickliu@gmail.com>"]
|
||||||
|
license = "MIT"
|
||||||
|
repository = "https://github.com/cyborg42/epub2mdbook"
|
||||||
|
keywords = ["epub", "mdbook", "converter", "ebook"]
|
||||||
|
categories = ["command-line-utilities", "text-processing"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.96"
|
|
||||||
clap = { version = "4.5.30", features = ["derive"] }
|
clap = { version = "4.5.30", features = ["derive"] }
|
||||||
epub = "2.1.1"
|
epub = "2.1.1"
|
||||||
html2md = "0.2.15"
|
html2md = "0.2.15"
|
||||||
regex = "1.11.1"
|
regex = "1.11.1"
|
||||||
|
thiserror = "2.0.11"
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,8 @@ This is a powerful tool to convert EPUB files to MDBook format.
|
||||||
### CLI
|
### CLI
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run -- --input-epub-path path/to/input.epub --output-dir path/to/output
|
cargo install epub2mdbook
|
||||||
|
epub2mdbook --input-epub path/to/input.epub --output-dir path/to/output
|
||||||
```
|
```
|
||||||
|
|
||||||
### Rust
|
### Rust
|
||||||
|
|
|
||||||
16
src/error.rs
Normal file
16
src/error.rs
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error("IO error: {0}")]
|
||||||
|
Io(#[from] std::io::Error),
|
||||||
|
|
||||||
|
#[error("EPUB error: {0}")]
|
||||||
|
Epub(#[from] epub::doc::DocError),
|
||||||
|
|
||||||
|
#[error("Invalid UTF-8: {0}")]
|
||||||
|
Utf8(#[from] std::string::FromUtf8Error),
|
||||||
|
|
||||||
|
#[error("{0} is not a file")]
|
||||||
|
NotAFile(String),
|
||||||
|
}
|
||||||
97
src/lib.rs
97
src/lib.rs
|
|
@ -1,18 +1,28 @@
|
||||||
|
pub mod error;
|
||||||
|
|
||||||
use epub::doc::{EpubDoc, NavPoint};
|
use epub::doc::{EpubDoc, NavPoint};
|
||||||
use html2md::parse_html;
|
use error::Error;
|
||||||
use regex::{Captures, Regex};
|
use regex::{Captures, Regex};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs;
|
|
||||||
use std::io::{Read, Seek};
|
use std::io::{Read, Seek};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::LazyLock;
|
||||||
|
use std::{fs, io};
|
||||||
|
|
||||||
|
/// Convert an EPUB file to an MDBook
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `epub_path` - The path to the EPUB file
|
||||||
|
/// * `output_dir` - The path to the output directory
|
||||||
|
///
|
||||||
pub fn convert_epub_to_mdbook(
|
pub fn convert_epub_to_mdbook(
|
||||||
epub_path: impl AsRef<Path>,
|
epub_path: impl AsRef<Path>,
|
||||||
output_dir: Option<impl AsRef<Path>>,
|
output_dir: Option<impl AsRef<Path>>,
|
||||||
) -> anyhow::Result<()> {
|
) -> Result<(), Error> {
|
||||||
let epub_path = epub_path.as_ref();
|
let epub_path = epub_path.as_ref();
|
||||||
if !epub_path.is_file() {
|
if !epub_path.is_file() {
|
||||||
return Err(anyhow::anyhow!("{} is not a file", epub_path.display()));
|
return Err(Error::NotAFile(epub_path.display().to_string()));
|
||||||
}
|
}
|
||||||
let book_name = epub_path.with_extension("");
|
let book_name = epub_path.with_extension("");
|
||||||
let book_name = book_name.file_name().unwrap().to_string_lossy().to_string();
|
let book_name = book_name.file_name().unwrap().to_string_lossy().to_string();
|
||||||
|
|
@ -31,7 +41,7 @@ pub fn convert_epub_to_mdbook(
|
||||||
};
|
};
|
||||||
let creator = doc.metadata.get("creator").and_then(|v| v.first().cloned());
|
let creator = doc.metadata.get("creator").and_then(|v| v.first().cloned());
|
||||||
|
|
||||||
let (toc, html_to_md) = toc_to_md(&doc, &title)?;
|
let (toc, html_to_md) = toc_to_md(&doc, &title);
|
||||||
fs::write(output_dir.join("src/SUMMARY.md"), toc)?;
|
fs::write(output_dir.join("src/SUMMARY.md"), toc)?;
|
||||||
|
|
||||||
extract_chapters_and_resources(&mut doc, &output_dir, &html_to_md)?;
|
extract_chapters_and_resources(&mut doc, &output_dir, &html_to_md)?;
|
||||||
|
|
@ -39,12 +49,12 @@ pub fn convert_epub_to_mdbook(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn nav_point_to_md(
|
fn nav_to_md(
|
||||||
nav: &NavPoint,
|
nav: &NavPoint,
|
||||||
indent: usize,
|
indent: usize,
|
||||||
html_files: &HashMap<PathBuf, PathBuf>,
|
html_to_md: &HashMap<PathBuf, PathBuf>,
|
||||||
) -> Option<String> {
|
) -> Option<String> {
|
||||||
let file = html_files.get(&nav.content)?;
|
let file = html_to_md.get(&nav.content)?;
|
||||||
let mut md = format!(
|
let mut md = format!(
|
||||||
"{}- [{}]({})\n",
|
"{}- [{}]({})\n",
|
||||||
" ".repeat(indent),
|
" ".repeat(indent),
|
||||||
|
|
@ -52,20 +62,31 @@ pub fn nav_point_to_md(
|
||||||
file.to_string_lossy()
|
file.to_string_lossy()
|
||||||
);
|
);
|
||||||
for child in &nav.children {
|
for child in &nav.children {
|
||||||
if let Some(child_md) = nav_point_to_md(child, indent + 1, html_files) {
|
if let Some(child_md) = nav_to_md(child, indent + 1, html_to_md) {
|
||||||
md.push_str(&child_md);
|
md.push_str(&child_md);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(md)
|
Some(md)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Convert the table of contents to SUMMARY.md
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `doc` - The EPUB document
|
||||||
|
/// * `title` - The title of the book
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// * `summary_md` - The SUMMARY.md content
|
||||||
|
/// * `html_to_md` - The file mapping from html to md
|
||||||
pub fn toc_to_md<R: Read + Seek>(
|
pub fn toc_to_md<R: Read + Seek>(
|
||||||
doc: &EpubDoc<R>,
|
doc: &EpubDoc<R>,
|
||||||
title: &str,
|
title: &str,
|
||||||
) -> anyhow::Result<(String, HashMap<PathBuf, PathBuf>)> {
|
) -> (String, HashMap<PathBuf, PathBuf>) {
|
||||||
let toc = doc.toc.clone();
|
let toc = doc.toc.clone();
|
||||||
|
|
||||||
let mut markdown = format!("# {}\n\n", title);
|
let mut summary_md = format!("# {}\n\n", title);
|
||||||
let html_to_md = doc
|
let html_to_md = doc
|
||||||
.resources
|
.resources
|
||||||
.iter()
|
.iter()
|
||||||
|
|
@ -73,48 +94,57 @@ pub fn toc_to_md<R: Read + Seek>(
|
||||||
.map(|(_, (path, _))| (path.clone(), path.with_extension("md")))
|
.map(|(_, (path, _))| (path.clone(), path.with_extension("md")))
|
||||||
.collect::<HashMap<PathBuf, PathBuf>>();
|
.collect::<HashMap<PathBuf, PathBuf>>();
|
||||||
for nav in toc {
|
for nav in toc {
|
||||||
if let Some(md) = nav_point_to_md(&nav, 0, &html_to_md) {
|
if let Some(md) = nav_to_md(&nav, 0, &html_to_md) {
|
||||||
markdown.push_str(&md);
|
summary_md.push_str(&md);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok((markdown, html_to_md))
|
(summary_md, html_to_md)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn extract_chapters_and_resources<R: Read + Seek>(
|
/// Capture the `{link}` without `#`, eg:
|
||||||
|
/// ```
|
||||||
|
/// [ABC]({abc.html}#xxx)
|
||||||
|
/// [ABC]({abc.html})
|
||||||
|
/// ```
|
||||||
|
static LINK_REGEX: LazyLock<Regex> =
|
||||||
|
LazyLock::new(|| Regex::new(r#"\[[^\]]+\]\(([^#)]+)(?:#[^)]+)?\)"#).unwrap());
|
||||||
|
|
||||||
|
fn extract_chapters_and_resources<R: Read + Seek>(
|
||||||
doc: &mut EpubDoc<R>,
|
doc: &mut EpubDoc<R>,
|
||||||
output_dir: impl AsRef<Path>,
|
output_dir: impl AsRef<Path>,
|
||||||
html_to_md: &HashMap<PathBuf, PathBuf>,
|
html_to_md: &HashMap<PathBuf, PathBuf>,
|
||||||
) -> anyhow::Result<()> {
|
) -> Result<(), Error> {
|
||||||
let output_dir = output_dir.as_ref();
|
let output_dir = output_dir.as_ref();
|
||||||
let src_dir = output_dir.join("src");
|
let src_dir = output_dir.join("src");
|
||||||
let re = Regex::new(r#"\[[^\]]+\]\(([^)]+)\)"#).unwrap(); // [abc](abc.html)
|
|
||||||
for (_, (path, _)) in doc.resources.clone().into_iter() {
|
for (_, (path, _)) in doc.resources.clone().into_iter() {
|
||||||
let content = match doc.get_resource_by_path(&path) {
|
let content = match doc.get_resource_by_path(&path) {
|
||||||
Some(content) => content,
|
Some(content) => content,
|
||||||
None => continue,
|
None => continue, // unreachable
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(path) = html_to_md.get(&path) {
|
if let Some(path) = html_to_md.get(&path) {
|
||||||
|
// html file, convert to md
|
||||||
let target_path = src_dir.join(path);
|
let target_path = src_dir.join(path);
|
||||||
if let Some(parent) = target_path.parent() {
|
if let Some(parent) = target_path.parent() {
|
||||||
fs::create_dir_all(parent)?;
|
fs::create_dir_all(parent)?;
|
||||||
}
|
}
|
||||||
let html = String::from_utf8(content)?;
|
let html = String::from_utf8(content)?;
|
||||||
let markdown = parse_html(&html);
|
let markdown = LINK_REGEX
|
||||||
let markdown = re
|
.replace_all(&html2md::parse_html(&html), |caps: &Captures| {
|
||||||
.replace_all(&markdown, |caps: &Captures| {
|
// replace [ABC](abc.html#xxx) to [ABC](abc.md#xxx)
|
||||||
let link = caps[1].to_string();
|
let origin = &caps[0];
|
||||||
let ori = caps[0].to_string();
|
let link = &caps[1];
|
||||||
if let Some(md_path) = html_to_md.get(&PathBuf::from(&link)) {
|
if let Some(md_path) = html_to_md.get(&PathBuf::from(link)) {
|
||||||
let md_path = md_path.to_string_lossy().to_string();
|
let md_path = md_path.to_string_lossy().to_string();
|
||||||
ori.replace(&link, &md_path)
|
origin.replace(link, &md_path)
|
||||||
} else {
|
} else {
|
||||||
ori
|
origin.to_string()
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.to_string();
|
.replace(r"![]()", "")
|
||||||
|
.replace(r"[]()", "");
|
||||||
fs::write(target_path, markdown)?;
|
fs::write(target_path, markdown)?;
|
||||||
} else {
|
} else {
|
||||||
|
// other file, just copy
|
||||||
let target_path = src_dir.join(&path);
|
let target_path = src_dir.join(&path);
|
||||||
if let Some(parent) = target_path.parent() {
|
if let Some(parent) = target_path.parent() {
|
||||||
fs::create_dir_all(parent)?;
|
fs::create_dir_all(parent)?;
|
||||||
|
|
@ -125,11 +155,11 @@ pub fn extract_chapters_and_resources<R: Read + Seek>(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write_book_toml(
|
fn write_book_toml(
|
||||||
output_dir: impl AsRef<Path>,
|
output_dir: impl AsRef<Path>,
|
||||||
title: &str,
|
title: &str,
|
||||||
creator: Option<String>,
|
creator: Option<String>,
|
||||||
) -> anyhow::Result<()> {
|
) -> io::Result<()> {
|
||||||
let output_dir = output_dir.as_ref();
|
let output_dir = output_dir.as_ref();
|
||||||
let creator = match creator {
|
let creator = match creator {
|
||||||
Some(creator) => format!("author = \"{creator}\"\n"),
|
Some(creator) => format!("author = \"{creator}\"\n"),
|
||||||
|
|
@ -145,12 +175,11 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
fn test_replace_links() {
|
fn test_replace_links() {
|
||||||
let markdown = r"[hello](hello.html)";
|
let markdown = r"[hello](hello.html#xxx) [hi](hi.xhtml)";
|
||||||
let re = Regex::new(r#"\[[^\]]+\]\(([^)]+)\)"#).unwrap();
|
let markdown = LINK_REGEX.replace_all(&markdown, |caps: &Captures| {
|
||||||
let markdown = re.replace_all(&markdown, |caps: &Captures| {
|
|
||||||
let link = caps[1].to_string();
|
let link = caps[1].to_string();
|
||||||
caps[0].replace(&link, "hello.md")
|
caps[0].replace(&link, "hello.md")
|
||||||
});
|
});
|
||||||
assert_eq!(markdown, "[hello](hello.md)");
|
assert_eq!(markdown, "[hello](hello.md#xxx) [hi](hello.md)");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use epub2mdbook::convert_epub_to_mdbook;
|
use epub2mdbook::{convert_epub_to_mdbook, error::Error};
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
struct Args {
|
struct Args {
|
||||||
|
|
@ -13,7 +13,7 @@ struct Args {
|
||||||
output_dir: Option<PathBuf>,
|
output_dir: Option<PathBuf>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> anyhow::Result<()> {
|
fn main() -> Result<(), Error> {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
convert_epub_to_mdbook(args.input_epub, args.output_dir)?;
|
convert_epub_to_mdbook(args.input_epub, args.output_dir)?;
|
||||||
println!("Conversion completed successfully!");
|
println!("Conversion completed successfully!");
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue