mirror of
https://github.com/tuna/tunasync-scripts.git
synced 2025-12-25 16:32:47 +00:00
shadowmire: update to taoky/shadowmire@50432cda2a [ci skip]
Signed-off-by: Shengqi Chen <harry-chen@outlook.com>
This commit is contained in:
parent
61d85ce2e0
commit
98fa39a31b
136
shadowmire.py
136
shadowmire.py
|
|
@ -36,6 +36,9 @@ logger = logging.getLogger("shadowmire")
|
||||||
|
|
||||||
|
|
||||||
USER_AGENT = "Shadowmire (https://github.com/taoky/shadowmire)"
|
USER_AGENT = "Shadowmire (https://github.com/taoky/shadowmire)"
|
||||||
|
LOCAL_DB_NAME = "local.db"
|
||||||
|
LOCAL_JSON_NAME = "local.json"
|
||||||
|
LOCAL_DB_SERIAL_NAME = "local.db.serial"
|
||||||
|
|
||||||
# Note that it's suggested to use only 3 workers for PyPI.
|
# Note that it's suggested to use only 3 workers for PyPI.
|
||||||
WORKERS = int(os.environ.get("SHADOWMIRE_WORKERS", "3"))
|
WORKERS = int(os.environ.get("SHADOWMIRE_WORKERS", "3"))
|
||||||
|
|
@ -450,6 +453,7 @@ class PyPI:
|
||||||
"files": [],
|
"files": [],
|
||||||
"meta": {
|
"meta": {
|
||||||
"api-version": "1.1",
|
"api-version": "1.1",
|
||||||
|
# not required by PEP691, but bandersnatch has it
|
||||||
"_last-serial": str(package_meta["last_serial"]),
|
"_last-serial": str(package_meta["last_serial"]),
|
||||||
},
|
},
|
||||||
"name": package_meta["info"]["name"],
|
"name": package_meta["info"]["name"],
|
||||||
|
|
@ -486,6 +490,7 @@ ShadowmirePackageItem = tuple[str, int]
|
||||||
class Plan:
|
class Plan:
|
||||||
remove: list[str]
|
remove: list[str]
|
||||||
update: list[str]
|
update: list[str]
|
||||||
|
remote_last_serial: int
|
||||||
|
|
||||||
|
|
||||||
def match_patterns(
|
def match_patterns(
|
||||||
|
|
@ -535,15 +540,15 @@ class SyncBase:
|
||||||
"""
|
"""
|
||||||
local should NOT skip invalid (-1) serials
|
local should NOT skip invalid (-1) serials
|
||||||
"""
|
"""
|
||||||
remote = self.fetch_remote_versions()
|
remote_sn, remote_pkgs = self.fetch_remote_versions()
|
||||||
remote = self.filter_remote_with_excludes(remote, excludes)
|
remote_pkgs = self.filter_remote_with_excludes(remote_pkgs, excludes)
|
||||||
with open(self.basedir / "remote_excluded.json", "w") as f:
|
with open(self.basedir / "remote_excluded.json", "w") as f:
|
||||||
json.dump(remote, f)
|
json.dump(remote_pkgs, f)
|
||||||
|
|
||||||
to_remove = []
|
to_remove = []
|
||||||
to_update = []
|
to_update = []
|
||||||
local_keys = set(local.keys())
|
local_keys = set(local.keys())
|
||||||
remote_keys = set(remote.keys())
|
remote_keys = set(remote_pkgs.keys())
|
||||||
for i in local_keys - remote_keys:
|
for i in local_keys - remote_keys:
|
||||||
to_remove.append(i)
|
to_remove.append(i)
|
||||||
local_keys.remove(i)
|
local_keys.remove(i)
|
||||||
|
|
@ -566,17 +571,21 @@ class SyncBase:
|
||||||
to_update.append(i)
|
to_update.append(i)
|
||||||
for i in local_keys:
|
for i in local_keys:
|
||||||
local_serial = local[i]
|
local_serial = local[i]
|
||||||
remote_serial = remote[i]
|
remote_serial = remote_pkgs[i]
|
||||||
if local_serial != remote_serial:
|
if local_serial != remote_serial:
|
||||||
if local_serial == -1:
|
if local_serial == -1:
|
||||||
logger.info("skip %s, as it's marked as not exist at upstream", i)
|
logger.info("skip %s, as it's marked as not exist at upstream", i)
|
||||||
to_remove.append(i)
|
to_remove.append(i)
|
||||||
else:
|
else:
|
||||||
to_update.append(i)
|
to_update.append(i)
|
||||||
output = Plan(remove=to_remove, update=to_update)
|
output = Plan(remove=to_remove, update=to_update, remote_last_serial=remote_sn)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def fetch_remote_versions(self) -> dict[str, int]:
|
def fetch_remote_versions(self) -> tuple[int, dict[str, int]]:
|
||||||
|
# returns (last_serial, {package_name: serial, ...})
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def get_package_metadata(self, package_name: str) -> dict:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def check_and_update(
|
def check_and_update(
|
||||||
|
|
@ -623,7 +632,9 @@ class SyncBase:
|
||||||
try:
|
try:
|
||||||
with open(json_meta_path, "r") as f:
|
with open(json_meta_path, "r") as f:
|
||||||
meta = json.load(f)
|
meta = json.load(f)
|
||||||
meta_filters(meta, package_name, prerelease_excludes, excluded_wheel_filenames)
|
meta_filters(
|
||||||
|
meta, package_name, prerelease_excludes, excluded_wheel_filenames
|
||||||
|
)
|
||||||
release_files = PyPI.get_release_files_from_meta(meta)
|
release_files = PyPI.get_release_files_from_meta(meta)
|
||||||
hrefs_from_meta = {
|
hrefs_from_meta = {
|
||||||
PyPI.file_url_to_local_url(i["url"]) for i in release_files
|
PyPI.file_url_to_local_url(i["url"]) for i in release_files
|
||||||
|
|
@ -811,12 +822,12 @@ class SyncBase:
|
||||||
index_html_path.unlink()
|
index_html_path.unlink()
|
||||||
index_html_path.symlink_to("index.v1_html")
|
index_html_path.symlink_to("index.v1_html")
|
||||||
|
|
||||||
def finalize(self) -> None:
|
def finalize(self, index_serial: int) -> None:
|
||||||
local_names = self.local_db.keys()
|
local_names = self.local_db.keys()
|
||||||
# generate index.html at basedir
|
# generate v1_html index
|
||||||
index_path = self.basedir / "simple" / "index.html"
|
v1_html_index_path = self.basedir / "simple" / "index.v1_html"
|
||||||
# modified from bandersnatch
|
# modified from bandersnatch
|
||||||
with overwrite(index_path) as f:
|
with overwrite(v1_html_index_path) as f:
|
||||||
f.write("<!DOCTYPE html>\n")
|
f.write("<!DOCTYPE html>\n")
|
||||||
f.write("<html>\n")
|
f.write("<html>\n")
|
||||||
f.write(" <head>\n")
|
f.write(" <head>\n")
|
||||||
|
|
@ -830,6 +841,25 @@ class SyncBase:
|
||||||
# We're really trusty that this is all encoded in UTF-8. :/
|
# We're really trusty that this is all encoded in UTF-8. :/
|
||||||
f.write(f' <a href="{pkg}/">{pkg}</a><br/>\n')
|
f.write(f' <a href="{pkg}/">{pkg}</a><br/>\n')
|
||||||
f.write(" </body>\n</html>")
|
f.write(" </body>\n</html>")
|
||||||
|
# always link index.html to index.v1_html
|
||||||
|
html_simple_path = self.basedir / "simple" / "index.html"
|
||||||
|
if not html_simple_path.is_symlink():
|
||||||
|
html_simple_path.unlink(missing_ok=True)
|
||||||
|
html_simple_path.symlink_to("index.v1_html")
|
||||||
|
|
||||||
|
# generate v1_json index and local.db{,.serial} for downstream use
|
||||||
|
v1_json_index_path = self.basedir / "simple" / "index.v1_json"
|
||||||
|
with overwrite(v1_json_index_path) as f:
|
||||||
|
index_json: dict[str, Any] = {
|
||||||
|
"meta": {
|
||||||
|
"api-version": "1.1",
|
||||||
|
"_last-serial": index_serial,
|
||||||
|
},
|
||||||
|
"projects": [{"name": n} for n in sorted(local_names)],
|
||||||
|
}
|
||||||
|
json.dump(index_json, f)
|
||||||
|
with overwrite(self.basedir / LOCAL_DB_SERIAL_NAME) as f:
|
||||||
|
f.write(str(index_serial))
|
||||||
self.local_db.dump_json()
|
self.local_db.dump_json()
|
||||||
|
|
||||||
def skip_this_package(self, i: dict, dest: Path) -> bool:
|
def skip_this_package(self, i: dict, dest: Path) -> bool:
|
||||||
|
|
@ -933,14 +963,17 @@ class SyncPyPI(SyncBase):
|
||||||
self.remote_packages: Optional[dict[str, int]] = None
|
self.remote_packages: Optional[dict[str, int]] = None
|
||||||
super().__init__(basedir, local_db, sync_packages)
|
super().__init__(basedir, local_db, sync_packages)
|
||||||
|
|
||||||
def fetch_remote_versions(self) -> dict[str, int]:
|
def fetch_remote_versions(self) -> tuple[int, dict[str, int]]:
|
||||||
self.last_serial = self.pypi.changelog_last_serial()
|
self.last_serial = self.pypi.changelog_last_serial()
|
||||||
self.remote_packages = self.pypi.list_packages_with_serial()
|
self.remote_packages = self.pypi.list_packages_with_serial()
|
||||||
logger.info("Remote has %s packages", len(self.remote_packages))
|
logger.info("Remote has %s packages", len(self.remote_packages))
|
||||||
with overwrite(self.basedir / "remote.json") as f:
|
with overwrite(self.basedir / "remote.json") as f:
|
||||||
json.dump(self.remote_packages, f)
|
json.dump(self.remote_packages, f)
|
||||||
logger.info("File saved to remote.json.")
|
logger.info("File saved to remote.json.")
|
||||||
return self.remote_packages
|
return self.last_serial, self.remote_packages
|
||||||
|
|
||||||
|
def get_package_metadata(self, package_name: str) -> dict:
|
||||||
|
return self.pypi.get_package_metadata(package_name)
|
||||||
|
|
||||||
def do_update(
|
def do_update(
|
||||||
self,
|
self,
|
||||||
|
|
@ -953,7 +986,7 @@ class SyncPyPI(SyncBase):
|
||||||
package_simple_path = self.simple_dir / package_name
|
package_simple_path = self.simple_dir / package_name
|
||||||
package_simple_path.mkdir(exist_ok=True)
|
package_simple_path.mkdir(exist_ok=True)
|
||||||
try:
|
try:
|
||||||
meta = self.pypi.get_package_metadata(package_name)
|
meta = self.get_package_metadata(package_name)
|
||||||
meta_original = deepcopy(meta)
|
meta_original = deepcopy(meta)
|
||||||
logger.debug("%s meta: %s", package_name, meta)
|
logger.debug("%s meta: %s", package_name, meta)
|
||||||
except PackageNotFoundError:
|
except PackageNotFoundError:
|
||||||
|
|
@ -999,9 +1032,7 @@ class SyncPyPI(SyncBase):
|
||||||
existing_hrefs = [] if existing_hrefs is None else existing_hrefs
|
existing_hrefs = [] if existing_hrefs is None else existing_hrefs
|
||||||
release_files = PyPI.get_release_files_from_meta(meta)
|
release_files = PyPI.get_release_files_from_meta(meta)
|
||||||
# remove packages that no longer exist remotely
|
# remove packages that no longer exist remotely
|
||||||
remote_hrefs = [
|
remote_hrefs = [PyPI.file_url_to_local_url(i["url"]) for i in release_files]
|
||||||
PyPI.file_url_to_local_url(i["url"]) for i in release_files
|
|
||||||
]
|
|
||||||
should_remove = list(set(existing_hrefs) - set(remote_hrefs))
|
should_remove = list(set(existing_hrefs) - set(remote_hrefs))
|
||||||
for href in should_remove:
|
for href in should_remove:
|
||||||
p = unquote(href)
|
p = unquote(href)
|
||||||
|
|
@ -1060,20 +1091,49 @@ class SyncPlainHTTP(SyncBase):
|
||||||
self.pypi = None
|
self.pypi = None
|
||||||
super().__init__(basedir, local_db, sync_packages)
|
super().__init__(basedir, local_db, sync_packages)
|
||||||
|
|
||||||
def fetch_remote_versions(self) -> dict[str, int]:
|
def fetch_remote_versions(self) -> tuple[int, dict[str, int]]:
|
||||||
remote: dict[str, int]
|
remote_pkgs: dict[str, int]
|
||||||
if not self.pypi:
|
if not self.pypi:
|
||||||
remote_url = urljoin(self.upstream, "local.json")
|
remote_pkg_db_url = urljoin(self.upstream, LOCAL_JSON_NAME)
|
||||||
resp = self.session.get(remote_url)
|
resp = self.session.get(remote_pkg_db_url)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
remote = resp.json()
|
remote_pkgs = resp.json()
|
||||||
|
# first fallback to max serial in remote_pkgs
|
||||||
|
serial = max(remote_pkgs.values()) if remote_pkgs else -1
|
||||||
|
# then try to get last serial from remote
|
||||||
|
remote_last_serial_url = urljoin(self.upstream, LOCAL_DB_SERIAL_NAME)
|
||||||
|
try:
|
||||||
|
resp = self.session.get(remote_last_serial_url)
|
||||||
|
resp.raise_for_status()
|
||||||
|
serial = int(resp.text.strip())
|
||||||
|
except (requests.RequestException, ValueError):
|
||||||
|
logger.warning(
|
||||||
|
f"cannot get last_serial from upstream, fallback to max package serial in {LOCAL_JSON_NAME}",
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
remote = self.pypi.list_packages_with_serial()
|
serial = self.pypi.changelog_last_serial()
|
||||||
logger.info("Remote has %s packages", len(remote))
|
remote_pkgs = self.pypi.list_packages_with_serial()
|
||||||
|
logger.info("Remote has %s packages", len(remote_pkgs))
|
||||||
with overwrite(self.basedir / "remote.json") as f:
|
with overwrite(self.basedir / "remote.json") as f:
|
||||||
json.dump(remote, f)
|
json.dump(remote_pkgs, f)
|
||||||
logger.info("File saved to remote.json.")
|
logger.info("File saved to remote.json.")
|
||||||
return remote
|
return serial, remote_pkgs
|
||||||
|
|
||||||
|
def get_package_metadata(self, package_name: str) -> dict:
|
||||||
|
file_url = urljoin(self.upstream, f"json/{package_name}")
|
||||||
|
success, resp = download(
|
||||||
|
self.session, file_url, self.jsonmeta_dir / (package_name + ".new")
|
||||||
|
)
|
||||||
|
if not success:
|
||||||
|
logger.error(
|
||||||
|
"download %s JSON meta fails with code %s",
|
||||||
|
package_name,
|
||||||
|
resp.status_code if resp else None,
|
||||||
|
)
|
||||||
|
raise PackageNotFoundError
|
||||||
|
assert resp
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
def do_update(
|
def do_update(
|
||||||
self,
|
self,
|
||||||
|
|
@ -1089,19 +1149,10 @@ class SyncPlainHTTP(SyncBase):
|
||||||
hrefs = get_existing_hrefs(package_simple_path)
|
hrefs = get_existing_hrefs(package_simple_path)
|
||||||
existing_hrefs = [] if hrefs is None else hrefs
|
existing_hrefs = [] if hrefs is None else hrefs
|
||||||
# Download JSON meta
|
# Download JSON meta
|
||||||
file_url = urljoin(self.upstream, f"json/{package_name}")
|
try:
|
||||||
success, resp = download(
|
meta = self.get_package_metadata(package_name)
|
||||||
self.session, file_url, self.jsonmeta_dir / (package_name + ".new")
|
except PackageNotFoundError:
|
||||||
)
|
|
||||||
if not success:
|
|
||||||
logger.error(
|
|
||||||
"download %s JSON meta fails with code %s",
|
|
||||||
package_name,
|
|
||||||
resp.status_code if resp else None,
|
|
||||||
)
|
|
||||||
return None
|
return None
|
||||||
assert resp
|
|
||||||
meta = resp.json()
|
|
||||||
# filter prerelease and wheel files, if necessary
|
# filter prerelease and wheel files, if necessary
|
||||||
meta_filters(meta, package_name, prerelease_excludes, excluded_wheel_filenames)
|
meta_filters(meta, package_name, prerelease_excludes, excluded_wheel_filenames)
|
||||||
|
|
||||||
|
|
@ -1264,8 +1315,7 @@ def cli(ctx: click.Context, repo: str) -> None:
|
||||||
|
|
||||||
# Make sure basedir is absolute
|
# Make sure basedir is absolute
|
||||||
basedir = Path(repo).resolve()
|
basedir = Path(repo).resolve()
|
||||||
local_db = LocalVersionKV(basedir / "local.db", basedir / "local.json")
|
local_db = LocalVersionKV(basedir / LOCAL_DB_NAME, basedir / LOCAL_JSON_NAME)
|
||||||
|
|
||||||
ctx.obj["basedir"] = basedir
|
ctx.obj["basedir"] = basedir
|
||||||
ctx.obj["local_db"] = local_db
|
ctx.obj["local_db"] = local_db
|
||||||
|
|
||||||
|
|
@ -1323,7 +1373,7 @@ def sync(
|
||||||
with overwrite(basedir / "plan.json") as f:
|
with overwrite(basedir / "plan.json") as f:
|
||||||
json.dump(plan, f, default=vars, indent=2)
|
json.dump(plan, f, default=vars, indent=2)
|
||||||
success = syncer.do_sync_plan(plan, prerelease_excludes, excluded_wheel_filenames)
|
success = syncer.do_sync_plan(plan, prerelease_excludes, excluded_wheel_filenames)
|
||||||
syncer.finalize()
|
syncer.finalize(plan.remote_last_serial)
|
||||||
|
|
||||||
logger.info("Synchronization finished. Success: %s", success)
|
logger.info("Synchronization finished. Success: %s", success)
|
||||||
|
|
||||||
|
|
@ -1488,7 +1538,7 @@ def verify(
|
||||||
packages_pathcache,
|
packages_pathcache,
|
||||||
compare_size,
|
compare_size,
|
||||||
)
|
)
|
||||||
syncer.finalize()
|
syncer.finalize(plan.remote_last_serial)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"====== Step 5. Remove any unreferenced files in `packages` folder ======"
|
"====== Step 5. Remove any unreferenced files in `packages` folder ======"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue