auto NumNonZero8x8ExceptDC(const int32_t* block, int32_t* nzeros_pos) -> int32_t