rm dependence url

This commit is contained in:
Maverick Liu 2025-02-22 18:29:12 +08:00
parent b8a8b834f6
commit 62a7985873
3 changed files with 16 additions and 304 deletions

294
Cargo.lock generated
View file

@ -214,14 +214,13 @@ dependencies = [
[[package]] [[package]]
name = "epub2mdbook" name = "epub2mdbook"
version = "0.4.0" version = "0.5.0"
dependencies = [ dependencies = [
"clap", "clap",
"epub", "epub",
"html2md", "html2md",
"regex", "regex",
"thiserror 2.0.11", "thiserror 2.0.11",
"url",
] ]
[[package]] [[package]]
@ -240,15 +239,6 @@ dependencies = [
"miniz_oxide", "miniz_oxide",
] ]
[[package]]
name = "form_urlencoded"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
dependencies = [
"percent-encoding",
]
[[package]] [[package]]
name = "futf" name = "futf"
version = "0.1.5" version = "0.1.5"
@ -299,145 +289,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "icu_collections"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
dependencies = [
"displaydoc",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_locid"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
dependencies = [
"displaydoc",
"litemap",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_locid_transform"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
dependencies = [
"displaydoc",
"icu_locid",
"icu_locid_transform_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_locid_transform_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
[[package]]
name = "icu_normalizer"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
dependencies = [
"displaydoc",
"icu_collections",
"icu_normalizer_data",
"icu_properties",
"icu_provider",
"smallvec",
"utf16_iter",
"utf8_iter",
"write16",
"zerovec",
]
[[package]]
name = "icu_normalizer_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
[[package]]
name = "icu_properties"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
dependencies = [
"displaydoc",
"icu_collections",
"icu_locid_transform",
"icu_properties_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
[[package]]
name = "icu_provider"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
dependencies = [
"displaydoc",
"icu_locid",
"icu_provider_macros",
"stable_deref_trait",
"tinystr",
"writeable",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_provider_macros"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "idna"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
dependencies = [
"idna_adapter",
"smallvec",
"utf8_iter",
]
[[package]]
name = "idna_adapter"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
dependencies = [
"icu_normalizer",
"icu_properties",
]
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "2.7.1" version = "2.7.1"
@ -486,12 +337,6 @@ version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "litemap"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
[[package]] [[package]]
name = "lock_api" name = "lock_api"
version = "0.4.12" version = "0.4.12"
@ -548,9 +393,9 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]] [[package]]
name = "miniz_oxide" name = "miniz_oxide"
version = "0.8.4" version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b" checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
dependencies = [ dependencies = [
"adler2", "adler2",
] ]
@ -705,9 +550,9 @@ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.5.8" version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" checksum = "82b568323e98e49e2a0899dcee453dd679fae22d69adf9b11dd508d1549b7e2f"
dependencies = [ dependencies = [
"bitflags", "bitflags",
] ]
@ -788,12 +633,6 @@ version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]] [[package]]
name = "string_cache" name = "string_cache"
version = "0.8.8" version = "0.8.8"
@ -836,17 +675,6 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "synstructure"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "tendril" name = "tendril"
version = "0.4.3" version = "0.4.3"
@ -898,16 +726,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "tinystr"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
dependencies = [
"displaydoc",
"zerovec",
]
[[package]] [[package]]
name = "toml_datetime" name = "toml_datetime"
version = "0.6.8" version = "0.6.8"
@ -931,35 +749,12 @@ version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe"
[[package]]
name = "url"
version = "2.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
]
[[package]] [[package]]
name = "utf-8" name = "utf-8"
version = "0.7.6" version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf16_iter"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
[[package]]
name = "utf8_iter"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]] [[package]]
name = "utf8parse" name = "utf8parse"
version = "0.2.2" version = "0.2.2"
@ -1067,18 +862,6 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "write16"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
[[package]]
name = "writeable"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
[[package]] [[package]]
name = "xml-rs" name = "xml-rs"
version = "0.8.25" version = "0.8.25"
@ -1096,73 +879,6 @@ dependencies = [
"markup5ever", "markup5ever",
] ]
[[package]]
name = "yoke"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
dependencies = [
"serde",
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zerofrom"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zerovec"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
dependencies = [
"yoke",
"zerofrom",
"zerovec-derive",
]
[[package]]
name = "zerovec-derive"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "zip" name = "zip"
version = "1.1.4" version = "1.1.4"

View file

@ -1,6 +1,6 @@
[package] [package]
name = "epub2mdbook" name = "epub2mdbook"
version = "0.4.0" version = "0.5.0"
edition = "2024" edition = "2024"
description = "A tool to convert EPUB files to MDBook format" description = "A tool to convert EPUB files to MDBook format"
authors = ["Maverick Liu <maverick.liu42@gmail.com>"] authors = ["Maverick Liu <maverick.liu42@gmail.com>"]
@ -15,4 +15,3 @@ epub = "2.1.1"
html2md = "0.2.15" html2md = "0.2.15"
regex = "1.11.1" regex = "1.11.1"
thiserror = "2.0.11" thiserror = "2.0.11"
url = "2.5.4"

View file

@ -123,7 +123,7 @@ fn extract_chapters_and_resources<R: Read + Seek>(
if let Some(parent) = target_path.parent() { if let Some(parent) = target_path.parent() {
fs::create_dir_all(parent)?; fs::create_dir_all(parent)?;
} }
let html = String::from_utf8(content.clone())?; let html = String::from_utf8(content)?;
let markdown = html2md::parse_html(&html); let markdown = html2md::parse_html(&html);
let markdown = post_process_md(&markdown, html_to_md); let markdown = post_process_md(&markdown, html_to_md);
fs::write(target_path, markdown)?; fs::write(target_path, markdown)?;
@ -144,28 +144,25 @@ fn extract_chapters_and_resources<R: Read + Seek>(
/// [ABC]({abc.html}#xxx) /// [ABC]({abc.html}#xxx)
/// [ABC]({abc.html}) /// [ABC]({abc.html})
/// ``` /// ```
static LINK_REGEX: LazyLock<Regex> = static LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"\[[^\]]+\]\(([^#)]+)(?:#[^)]+)?\)"#).expect("unreachable")); LazyLock::new(|| Regex::new(r#"\[[^\]]+\]\(([^#)]+)(?:#[^)]+)?\)"#).expect("unreachable"));
static EMPTY_LINK: LazyLock<Regex> =
/// Capture the empty links, eg:
/// ```
/// [{ABC}]()
/// ```
static EMPTY_LINK_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"\[([^\]]+)\]\(\)"#).expect("unreachable")); LazyLock::new(|| Regex::new(r#"\[([^\]]+)\]\(\)"#).expect("unreachable"));
static URL_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").expect("unreachable"));
fn post_process_md(markdown: &str, html_to_md: &HashMap<PathBuf, PathBuf>) -> String { fn post_process_md(markdown: &str, html_to_md: &HashMap<PathBuf, PathBuf>) -> String {
let file_name_map = html_to_md let file_name_map = html_to_md
.iter() .iter()
.filter_map(|(k, v)| Some((k.file_name()?, v.file_name()?))) .filter_map(|(k, v)| Some((k.file_name()?, v.file_name()?)))
.collect::<HashMap<_, _>>(); .collect::<HashMap<_, _>>();
let markdown = LINK_REGEX let markdown = LINK
.replace_all(markdown, |caps: &Captures| { .replace_all(markdown, |caps: &Captures| {
// replace [ABC](abc.html#xxx) to [ABC](abc.md#xxx) // replace [ABC](abc.html#xxx) to [ABC](abc.md#xxx)
let origin = &caps[0]; let origin = &caps[0];
let link = &caps[1]; let link = &caps[1];
if url::Url::parse(link).is_ok() { // Don't modify links with schemes like `https`.
if URL_LINK.is_match(link) {
return origin.to_string(); return origin.to_string();
} }
let link = match Path::new(&link).file_name() { let link = match Path::new(&link).file_name() {
@ -184,7 +181,7 @@ fn post_process_md(markdown: &str, html_to_md: &HashMap<PathBuf, PathBuf>) -> St
.replace(r"![]()", "") .replace(r"![]()", "")
.replace(r"[]()", ""); .replace(r"[]()", "");
EMPTY_LINK_REGEX EMPTY_LINK
.replace_all(&markdown, |caps: &Captures| caps[1].to_string()) .replace_all(&markdown, |caps: &Captures| caps[1].to_string())
.to_string() .to_string()
} }
@ -210,7 +207,7 @@ mod tests {
#[test] #[test]
fn test_replace_links() { fn test_replace_links() {
let markdown = r"[hello](hello.html#xxx) [hi](hi.xhtml)"; let markdown = r"[hello](hello.html#xxx) [hi](hi.xhtml)";
let markdown = LINK_REGEX.replace_all(&markdown, |caps: &Captures| { let markdown = LINK.replace_all(&markdown, |caps: &Captures| {
let link = caps[1].to_string(); let link = caps[1].to_string();
caps[0].replace(&link, "hello.md") caps[0].replace(&link, "hello.md")
}); });