{"id":14405375,"name":"html_docs_crawler","ecosystem":"pypi","description":"Universal documentation crawler that converts HTML pages to Markdown with internal link correction","homepage":"https://github.com/zwidny/doc_crawler","licenses":null,"normalized_licenses":[],"repository_url":"https://github.com/zwidny/doc_crawler","keywords_array":["scrapy","crawler","markdown","documentation","web-scraping"],"namespace":null,"versions_count":4,"first_release_published_at":"2026-05-06T09:07:12.000Z","latest_release_published_at":"2026-06-03T03:08:48.000Z","latest_release_number":"0.2.0","last_synced_at":"2026-06-16T19:05:13.263Z","created_at":"2026-05-08T11:29:23.140Z","updated_at":"2026-06-16T19:08:31.736Z","registry_url":"https://pypi.org/project/html_docs_crawler/","install_command":"pip install html_docs_crawler --index-url https://pypi.org/simple","documentation_url":"https://html_docs_crawler.readthedocs.io/","metadata":{"funding":null,"documentation":null,"classifiers":[],"normalized_name":"html-docs-crawler","project_status":null},"repo_metadata":{"id":356024421,"uuid":"1230693986","full_name":"zwidny/doc_crawler","owner":"zwidny","description":"A Scrapy-based universal documentation crawler that converts HTML documentation sites to Markdown format, with automatic internal link rewriting to local `.md` relative paths. Supports multiple converter engines (markitdown / html2text), path whitelist filtering, and automatic media file download.","archived":false,"fork":false,"pushed_at":"2026-06-03T03:07:26.000Z","size":212,"stargazers_count":0,"open_issues_count":0,"forks_count":0,"subscribers_count":0,"default_branch":"master","last_synced_at":"2026-06-03T05:07:36.594Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":null,"language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/zwidny.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null,"notice":null,"maintainers":null,"copyright":null,"agents":"AGENTS.md","dco":null,"cla":null}},"created_at":"2026-05-06T08:27:31.000Z","updated_at":"2026-06-03T03:07:30.000Z","dependencies_parsed_at":null,"dependency_job_id":null,"html_url":"https://github.com/zwidny/doc_crawler","commit_stats":null,"previous_names":["zwidny/doc_crawler"],"tags_count":0,"template":false,"template_full_name":null,"purl":"pkg:github/zwidny/doc_crawler","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/zwidny%2Fdoc_crawler","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/zwidny%2Fdoc_crawler/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/zwidny%2Fdoc_crawler/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/zwidny%2Fdoc_crawler/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/zwidny","download_url":"https://codeload.github.com/zwidny/doc_crawler/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/zwidny%2Fdoc_crawler/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":34419571,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-05-26T15:22:16.424Z","status":"online","status_checked_at":"2026-06-16T02:00:06.860Z","response_time":126,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"repo_metadata_updated_at":"2026-06-16T19:08:31.736Z","dependent_packages_count":0,"downloads":120,"downloads_period":"last-month","dependent_repos_count":0,"rankings":{"downloads":26.19235487031955,"dependent_repos_count":41.04444956509393,"dependent_packages_count":7.2559603196216305,"stargazers_count":null,"forks_count":null,"docker_downloads_count":null,"average":24.830921585011705},"purl":"pkg:pypi/html-docs-crawler","advisories":[],"docker_usage_url":"https://docker.ecosyste.ms/usage/pypi/html_docs_crawler","docker_dependents_count":null,"docker_downloads_count":null,"usage_url":"https://repos.ecosyste.ms/usage/pypi/html_docs_crawler","dependent_repositories_url":"https://repos.ecosyste.ms/api/v1/usage/pypi/html_docs_crawler/dependencies","status":null,"funding_links":[],"critical":null,"issue_metadata":null,"versions_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/html_docs_crawler/versions","version_numbers_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/html_docs_crawler/version_numbers","latest_version_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/html_docs_crawler/latest_version","dependent_packages_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/html_docs_crawler/dependent_packages","related_packages_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/html_docs_crawler/related_packages","codemeta_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/html_docs_crawler/codemeta","maintainers":[{"uuid":"zwidny","login":"zwidny","name":null,"email":null,"url":null,"packages_count":2,"html_url":"https://pypi.org/user/zwidny/","role":"Owner","created_at":"2026-05-08T12:44:14.625Z","updated_at":"2026-05-08T12:44:14.625Z","packages_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/maintainers/zwidny/packages"}]}