mirror of
https://github.com/cyborg42/epub2mdbook.git
synced 2026-02-10 16:24:50 -05:00
dev
This commit is contained in:
parent
749af017ca
commit
6bcc1d9682
4 changed files with 52 additions and 19 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -226,6 +226,7 @@ dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"epub",
|
"epub",
|
||||||
"html2md",
|
"html2md",
|
||||||
|
"regex",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
||||||
|
|
@ -8,3 +8,4 @@ anyhow = "1.0.96"
|
||||||
clap = { version = "4.5.30", features = ["derive"] }
|
clap = { version = "4.5.30", features = ["derive"] }
|
||||||
epub = "2.1.1"
|
epub = "2.1.1"
|
||||||
html2md = "0.2.15"
|
html2md = "0.2.15"
|
||||||
|
regex = "1.11.1"
|
||||||
|
|
|
||||||
65
src/lib.rs
65
src/lib.rs
|
|
@ -1,5 +1,6 @@
|
||||||
use epub::doc::{EpubDoc, NavPoint};
|
use epub::doc::{EpubDoc, NavPoint};
|
||||||
use html2md::parse_html;
|
use html2md::parse_html;
|
||||||
|
use regex::{Captures, Regex};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io::{Read, Seek};
|
use std::io::{Read, Seek};
|
||||||
|
|
@ -9,28 +10,31 @@ pub fn convert_epub_to_mdbook(
|
||||||
epub_path: impl AsRef<Path>,
|
epub_path: impl AsRef<Path>,
|
||||||
output_dir: Option<impl AsRef<Path>>,
|
output_dir: Option<impl AsRef<Path>>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let book_name = epub_path.as_ref().with_extension("");
|
let epub_path = epub_path.as_ref();
|
||||||
let book_name = book_name.file_name().unwrap().to_str().unwrap();
|
if !epub_path.is_file() {
|
||||||
let pwd = PathBuf::from(".");
|
return Err(anyhow::anyhow!("{} is not a file", epub_path.display()));
|
||||||
|
}
|
||||||
|
let book_name = epub_path.with_extension("");
|
||||||
|
let book_name = book_name.file_name().unwrap().to_string_lossy().to_string();
|
||||||
let output_dir = match output_dir {
|
let output_dir = match output_dir {
|
||||||
Some(output_dir) => output_dir.as_ref().join(book_name),
|
Some(output_dir) => output_dir.as_ref().join(&book_name),
|
||||||
None => pwd.join(book_name),
|
None => PathBuf::from(".").join(&book_name),
|
||||||
};
|
};
|
||||||
|
|
||||||
fs::create_dir_all(output_dir.join("src"))?;
|
fs::create_dir_all(output_dir.join("src"))?;
|
||||||
|
|
||||||
let mut doc = EpubDoc::new(&epub_path)?;
|
let mut doc = EpubDoc::new(epub_path)?;
|
||||||
let title = if let Some(title) = doc.metadata.get("title") {
|
let title = if let Some(title) = doc.metadata.get("title") {
|
||||||
title.first().unwrap_or(&book_name.to_string()).clone()
|
title.first().cloned().unwrap_or(book_name)
|
||||||
} else {
|
} else {
|
||||||
book_name.to_string()
|
book_name
|
||||||
};
|
};
|
||||||
let creator = doc.metadata.get("creator").and_then(|v| v.first().cloned());
|
let creator = doc.metadata.get("creator").and_then(|v| v.first().cloned());
|
||||||
|
|
||||||
let (toc, html_files) = toc_to_md(&doc, &title)?;
|
let (toc, html_to_md) = toc_to_md(&doc, &title)?;
|
||||||
fs::write(output_dir.join("src/SUMMARY.md"), toc)?;
|
fs::write(output_dir.join("src/SUMMARY.md"), toc)?;
|
||||||
|
|
||||||
extract_chapters_and_resources(&mut doc, &output_dir, &html_files)?;
|
extract_chapters_and_resources(&mut doc, &output_dir, &html_to_md)?;
|
||||||
write_book_toml(&output_dir, &title, creator)?;
|
write_book_toml(&output_dir, &title, creator)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
@ -45,7 +49,7 @@ pub fn nav_point_to_md(
|
||||||
"{}- [{}]({})\n",
|
"{}- [{}]({})\n",
|
||||||
" ".repeat(indent),
|
" ".repeat(indent),
|
||||||
nav.label,
|
nav.label,
|
||||||
file.to_str()?
|
file.to_string_lossy()
|
||||||
);
|
);
|
||||||
for child in &nav.children {
|
for child in &nav.children {
|
||||||
if let Some(child_md) = nav_point_to_md(child, indent + 1, html_files) {
|
if let Some(child_md) = nav_point_to_md(child, indent + 1, html_files) {
|
||||||
|
|
@ -62,41 +66,53 @@ pub fn toc_to_md<R: Read + Seek>(
|
||||||
let toc = doc.toc.clone();
|
let toc = doc.toc.clone();
|
||||||
|
|
||||||
let mut markdown = format!("# {}\n\n", title);
|
let mut markdown = format!("# {}\n\n", title);
|
||||||
let html_files = doc
|
let html_to_md = doc
|
||||||
.resources
|
.resources
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|(_, (_, mime))| mime == "application/xhtml+xml")
|
.filter(|(_, (_, mime))| mime == "application/xhtml+xml")
|
||||||
.map(|(_, (path, _))| (path.clone(), path.with_extension("md")))
|
.map(|(_, (path, _))| (path.clone(), path.with_extension("md")))
|
||||||
.collect::<HashMap<PathBuf, PathBuf>>();
|
.collect::<HashMap<PathBuf, PathBuf>>();
|
||||||
for nav in toc {
|
for nav in toc {
|
||||||
if let Some(md) = nav_point_to_md(&nav, 0, &html_files) {
|
if let Some(md) = nav_point_to_md(&nav, 0, &html_to_md) {
|
||||||
markdown.push_str(&md);
|
markdown.push_str(&md);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok((markdown, html_files))
|
Ok((markdown, html_to_md))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn extract_chapters_and_resources<R: Read + Seek>(
|
pub fn extract_chapters_and_resources<R: Read + Seek>(
|
||||||
doc: &mut EpubDoc<R>,
|
doc: &mut EpubDoc<R>,
|
||||||
output_dir: impl AsRef<Path>,
|
output_dir: impl AsRef<Path>,
|
||||||
html_files: &HashMap<PathBuf, PathBuf>,
|
html_to_md: &HashMap<PathBuf, PathBuf>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let output_dir = output_dir.as_ref();
|
let output_dir = output_dir.as_ref();
|
||||||
let src_dir = output_dir.join("src");
|
let src_dir = output_dir.join("src");
|
||||||
|
let re = Regex::new(r#"\[[^\]]+\]\(([^)]+)\)"#).unwrap(); // [abc](abc.html)
|
||||||
for (_, (path, _)) in doc.resources.clone().into_iter() {
|
for (_, (path, _)) in doc.resources.clone().into_iter() {
|
||||||
let content = match doc.get_resource_by_path(&path) {
|
let content = match doc.get_resource_by_path(&path) {
|
||||||
Some(content) => content,
|
Some(content) => content,
|
||||||
None => continue,
|
None => continue,
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(path) = html_files.get(&path) {
|
if let Some(path) = html_to_md.get(&path) {
|
||||||
let target_path = src_dir.join(path);
|
let target_path = src_dir.join(path);
|
||||||
if let Some(parent) = target_path.parent() {
|
if let Some(parent) = target_path.parent() {
|
||||||
fs::create_dir_all(parent)?;
|
fs::create_dir_all(parent)?;
|
||||||
}
|
}
|
||||||
let html = String::from_utf8(content)?;
|
let html = String::from_utf8(content)?;
|
||||||
let markdown = parse_html(&html);
|
let markdown = parse_html(&html);
|
||||||
|
let markdown = re
|
||||||
|
.replace_all(&markdown, |caps: &Captures| {
|
||||||
|
let link = caps[1].to_string();
|
||||||
|
let ori = caps[0].to_string();
|
||||||
|
if let Some(md_path) = html_to_md.get(&PathBuf::from(&link)) {
|
||||||
|
let md_path = md_path.to_string_lossy().to_string();
|
||||||
|
ori.replace(&link, &md_path)
|
||||||
|
} else {
|
||||||
|
ori
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.to_string();
|
||||||
fs::write(target_path, markdown)?;
|
fs::write(target_path, markdown)?;
|
||||||
} else {
|
} else {
|
||||||
let target_path = src_dir.join(&path);
|
let target_path = src_dir.join(&path);
|
||||||
|
|
@ -123,3 +139,18 @@ pub fn write_book_toml(
|
||||||
fs::write(output_dir.join("book.toml"), toml_content)?;
|
fs::write(output_dir.join("book.toml"), toml_content)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
#[test]
|
||||||
|
fn test_replace_links() {
|
||||||
|
let markdown = r"[hello](hello.html)";
|
||||||
|
let re = Regex::new(r#"\[[^\]]+\]\(([^)]+)\)"#).unwrap();
|
||||||
|
let markdown = re.replace_all(&markdown, |caps: &Captures| {
|
||||||
|
let link = caps[1].to_string();
|
||||||
|
caps[0].replace(&link, "hello.md")
|
||||||
|
});
|
||||||
|
assert_eq!(markdown, "[hello](hello.md)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ use epub2mdbook::convert_epub_to_mdbook;
|
||||||
struct Args {
|
struct Args {
|
||||||
/// The path to the input EPUB file
|
/// The path to the input EPUB file
|
||||||
#[clap(short, long)]
|
#[clap(short, long)]
|
||||||
input_epub_path: PathBuf,
|
input_epub: PathBuf,
|
||||||
/// The path to the output directory
|
/// The path to the output directory
|
||||||
#[clap(short, long)]
|
#[clap(short, long)]
|
||||||
output_dir: Option<PathBuf>,
|
output_dir: Option<PathBuf>,
|
||||||
|
|
@ -15,7 +15,7 @@ struct Args {
|
||||||
|
|
||||||
fn main() -> anyhow::Result<()> {
|
fn main() -> anyhow::Result<()> {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
convert_epub_to_mdbook(args.input_epub_path, args.output_dir)?;
|
convert_epub_to_mdbook(args.input_epub, args.output_dir)?;
|
||||||
println!("Conversion completed successfully!");
|
println!("Conversion completed successfully!");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue