mirror of
https://github.com/cyborg42/epub2mdbook.git
synced 2026-02-10 16:24:50 -05:00
update
This commit is contained in:
parent
d55060ab0f
commit
018d27b707
4 changed files with 25 additions and 21 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -214,7 +214,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "epub2mdbook"
|
name = "epub2mdbook"
|
||||||
version = "0.6.0"
|
version = "0.7.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"epub",
|
"epub",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "epub2mdbook"
|
name = "epub2mdbook"
|
||||||
version = "0.6.0"
|
version = "0.7.0"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
description = "A tool to convert EPUB files to MDBook format"
|
description = "A tool to convert EPUB files to MDBook format"
|
||||||
authors = ["Maverick Liu <maverick.liu42@gmail.com>"]
|
authors = ["Maverick Liu <maverick.liu42@gmail.com>"]
|
||||||
|
|
|
||||||
40
src/lib.rs
40
src/lib.rs
|
|
@ -4,6 +4,7 @@ use epub::doc::{EpubDoc, NavPoint};
|
||||||
use error::Error;
|
use error::Error;
|
||||||
use regex::{Captures, Regex};
|
use regex::{Captures, Regex};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::ffi::OsStr;
|
||||||
use std::io::{Read, Seek};
|
use std::io::{Read, Seek};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::LazyLock;
|
use std::sync::LazyLock;
|
||||||
|
|
@ -14,7 +15,7 @@ use std::{fs, io};
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `epub_path` - The path to the EPUB file
|
/// * `epub_path` - The path to the EPUB file
|
||||||
/// * `output_dir` - The path to the output directory
|
/// * `output_dir` - The path to the output directory, pwd by default
|
||||||
///
|
///
|
||||||
pub fn convert_epub_to_mdbook(
|
pub fn convert_epub_to_mdbook(
|
||||||
epub_path: impl AsRef<Path>,
|
epub_path: impl AsRef<Path>,
|
||||||
|
|
@ -37,11 +38,11 @@ pub fn convert_epub_to_mdbook(
|
||||||
fs::create_dir_all(output_dir.join("src"))?;
|
fs::create_dir_all(output_dir.join("src"))?;
|
||||||
|
|
||||||
let mut doc = EpubDoc::new(epub_path)?;
|
let mut doc = EpubDoc::new(epub_path)?;
|
||||||
let title = if let Some(title) = doc.metadata.get("title") {
|
let title = doc
|
||||||
title.first().cloned().unwrap_or(book_name)
|
.metadata
|
||||||
} else {
|
.get("title")
|
||||||
book_name
|
.and_then(|v| v.first().cloned())
|
||||||
};
|
.unwrap_or(book_name);
|
||||||
let creator = doc.metadata.get("creator").and_then(|v| v.first().cloned());
|
let creator = doc.metadata.get("creator").and_then(|v| v.first().cloned());
|
||||||
let (toc, html_to_md) = toc_to_md(&doc, &title);
|
let (toc, html_to_md) = toc_to_md(&doc, &title);
|
||||||
extract_chapters_and_resources(&mut doc, &output_dir, &html_to_md)?;
|
extract_chapters_and_resources(&mut doc, &output_dir, &html_to_md)?;
|
||||||
|
|
@ -107,6 +108,10 @@ fn extract_chapters_and_resources<R: Read + Seek>(
|
||||||
output_dir: impl AsRef<Path>,
|
output_dir: impl AsRef<Path>,
|
||||||
html_to_md: &HashMap<PathBuf, PathBuf>,
|
html_to_md: &HashMap<PathBuf, PathBuf>,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
|
let file_name_map = html_to_md
|
||||||
|
.iter()
|
||||||
|
.filter_map(|(k, v)| Some((k.file_name()?, v.file_name()?)))
|
||||||
|
.collect::<HashMap<_, _>>();
|
||||||
let output_dir = output_dir.as_ref();
|
let output_dir = output_dir.as_ref();
|
||||||
let src_dir = output_dir.join("src");
|
let src_dir = output_dir.join("src");
|
||||||
for (_, (path, _)) in doc.resources.clone().into_iter() {
|
for (_, (path, _)) in doc.resources.clone().into_iter() {
|
||||||
|
|
@ -116,13 +121,17 @@ fn extract_chapters_and_resources<R: Read + Seek>(
|
||||||
};
|
};
|
||||||
if let Some(md_path) = html_to_md.get(&path) {
|
if let Some(md_path) = html_to_md.get(&path) {
|
||||||
// html file, convert to md
|
// html file, convert to md
|
||||||
let target_path = src_dir.join(md_path);
|
let target_path = if md_path == Path::new("SUMMARY.md") {
|
||||||
|
src_dir.join("_SUMMARY.md")
|
||||||
|
} else {
|
||||||
|
src_dir.join(md_path)
|
||||||
|
};
|
||||||
if let Some(parent) = target_path.parent() {
|
if let Some(parent) = target_path.parent() {
|
||||||
fs::create_dir_all(parent)?;
|
fs::create_dir_all(parent)?;
|
||||||
}
|
}
|
||||||
let html = String::from_utf8(content)?;
|
let html = String::from_utf8(content)?;
|
||||||
let markdown = html2md::parse_html(&html);
|
let markdown = html2md::parse_html(&html);
|
||||||
let markdown = post_process_md(&markdown, html_to_md);
|
let markdown = post_process_md(&markdown, &file_name_map);
|
||||||
fs::write(target_path, markdown)?;
|
fs::write(target_path, markdown)?;
|
||||||
} else {
|
} else {
|
||||||
// other file, just copy
|
// other file, just copy
|
||||||
|
|
@ -147,12 +156,7 @@ static EMPTY_LINK: LazyLock<Regex> =
|
||||||
LazyLock::new(|| Regex::new(r#"\[([^\]]+)\]\(\)"#).expect("unreachable"));
|
LazyLock::new(|| Regex::new(r#"\[([^\]]+)\]\(\)"#).expect("unreachable"));
|
||||||
static URL_LINK: LazyLock<Regex> =
|
static URL_LINK: LazyLock<Regex> =
|
||||||
LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").expect("unreachable"));
|
LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").expect("unreachable"));
|
||||||
fn post_process_md(markdown: &str, html_to_md: &HashMap<PathBuf, PathBuf>) -> String {
|
fn post_process_md(markdown: &str, file_name_map: &HashMap<&OsStr, &OsStr>) -> String {
|
||||||
let file_name_map = html_to_md
|
|
||||||
.iter()
|
|
||||||
.filter_map(|(k, v)| Some((k.file_name()?, v.file_name()?)))
|
|
||||||
.collect::<HashMap<_, _>>();
|
|
||||||
|
|
||||||
let markdown = LINK
|
let markdown = LINK
|
||||||
.replace_all(markdown, |caps: &Captures| {
|
.replace_all(markdown, |caps: &Captures| {
|
||||||
// replace [ABC](abc.html#xxx) to [ABC](abc.md#xxx)
|
// replace [ABC](abc.html#xxx) to [ABC](abc.md#xxx)
|
||||||
|
|
@ -162,14 +166,14 @@ fn post_process_md(markdown: &str, html_to_md: &HashMap<PathBuf, PathBuf>) -> St
|
||||||
if URL_LINK.is_match(link) {
|
if URL_LINK.is_match(link) {
|
||||||
return origin.to_string();
|
return origin.to_string();
|
||||||
}
|
}
|
||||||
let link = match Path::new(&link).file_name() {
|
let html_file_name = match Path::new(&link).file_name() {
|
||||||
Some(link) => link,
|
Some(link) => link,
|
||||||
None => return origin.to_string(),
|
None => return origin.to_string(),
|
||||||
};
|
};
|
||||||
if let Some(md_path) = file_name_map.get(link) {
|
if let Some(md_file_name) = file_name_map.get(html_file_name) {
|
||||||
origin.replace(
|
origin.replace(
|
||||||
&link.to_string_lossy().to_string(),
|
&html_file_name.to_string_lossy().to_string(),
|
||||||
&md_path.to_string_lossy(),
|
&md_file_name.to_string_lossy(),
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
origin.to_string()
|
origin.to_string()
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ struct Args {
|
||||||
/// The path to the input EPUB file
|
/// The path to the input EPUB file
|
||||||
#[clap(short, long)]
|
#[clap(short, long)]
|
||||||
input_epub: PathBuf,
|
input_epub: PathBuf,
|
||||||
/// The path to the output directory
|
/// The path to the output directory, pwd by default
|
||||||
#[clap(short, long)]
|
#[clap(short, long)]
|
||||||
output_dir: Option<PathBuf>,
|
output_dir: Option<PathBuf>,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue