Merge branch 'rotary2024' into qa

This commit is contained in:
Barbara Miller 2024-04-19 14:02:26 -07:00
commit 0485d9c83a

View File

@ -25,12 +25,13 @@ import tempfile
import urlcanon
import os
import json
import doublethink
import datetime
from cassandra import ReadTimeout
from cassandra.cluster import Cluster
import threading
import traceback
import doublethink
thread_local = threading.local()
@ -230,23 +231,26 @@ def _build_youtube_dl(worker, destdir, site, page):
# transfer, which warcprox currently rejects
extra_headers = dict(site.extra_headers())
extra_headers["content-length"] = size
request, response = worker._warcprox_write_record(
warcprox_address=worker._proxy_for(site),
url=url,
warc_type="resource",
content_type=mimetype,
payload=f,
extra_headers=extra_headers,
)
# consulted by _remember_videos()
ydl.pushed_videos.append(
{
"url": url,
"response_code": response.code,
"content-type": mimetype,
"content-length": size,
}
)
try:
request, response = worker._warcprox_write_record(
warcprox_address=worker._proxy_for(site),
url=url,
warc_type="resource",
content_type=mimetype,
payload=f,
extra_headers=extra_headers,
)
# consulted by _remember_videos()
ydl.pushed_videos.append(
{
"url": url,
"response_code": response.code,
"content-type": mimetype,
"content-length": size,
}
)
except:
traceback.print_exc()
def maybe_heartbeat_site_last_claimed(*args, **kwargs):
# in case yt-dlp takes a long time, heartbeat site.last_claimed