auto SumOfSquareDifferences(const jxl::Image3F& forig, const jxl::Image3F& smooth, ThreadPool* pool) -> jxl::ImageF