21 #include "./vpx_config.h"
22 #include "./y4minput.h"
23 #include "../vpx_ports/vpx_timer.h"
26 #include "vpx_ports/bitops.h"
28 #include "../tools_common.h"
29 #include "../video_writer.h"
33 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
35 static const char *exec_name;
37 void usage_exit(
void) { exit(EXIT_FAILURE); }
40 enum denoiserStateVp8 {
44 kVp8DenoiserOnYUVAggressive,
45 kVp8DenoiserOnAdaptive
49 enum denoiserStateVp9 {
53 kVp9DenoiserOnYTwoSpatialLayers
56 static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 };
59 struct RateControlMetrics {
78 double avg_st_encoding_bitrate;
80 double variance_st_encoding_bitrate;
94 static void set_rate_control_metrics(
struct RateControlMetrics *rc,
103 1000.0 * rc->layer_target_bitrate[0] / rc->layer_framerate[0];
104 for (i = 0; i < ts_number_layers; ++i) {
109 (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
110 (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
112 rc->layer_input_frames[i] = 0;
113 rc->layer_enc_frames[i] = 0;
114 rc->layer_tot_enc_frames[i] = 0;
115 rc->layer_encoding_bitrate[i] = 0.0;
116 rc->layer_avg_frame_size[i] = 0.0;
117 rc->layer_avg_rate_mismatch[i] = 0.0;
119 rc->window_count = 0;
120 rc->window_size = 15;
121 rc->avg_st_encoding_bitrate = 0.0;
122 rc->variance_st_encoding_bitrate = 0.0;
128 static void printout_rate_control_summary(
struct RateControlMetrics *rc,
132 int tot_num_frames = 0;
133 double perc_fluctuation = 0.0;
134 printf(
"Total number of processed frames: %d\n\n", frame_cnt - 1);
135 printf(
"Rate control layer stats for %d layer(s):\n\n",
138 const int num_dropped =
139 (i > 0) ? (rc->layer_input_frames[i] - rc->layer_enc_frames[i])
140 : (rc->layer_input_frames[i] - rc->layer_enc_frames[i] - 1);
141 tot_num_frames += rc->layer_input_frames[i];
142 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[i] *
143 rc->layer_encoding_bitrate[i] /
145 rc->layer_avg_frame_size[i] =
146 rc->layer_avg_frame_size[i] / rc->layer_enc_frames[i];
147 rc->layer_avg_rate_mismatch[i] =
148 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[i];
149 printf(
"For layer#: %d \n", i);
150 printf(
"Bitrate (target vs actual): %d %f \n", rc->layer_target_bitrate[i],
151 rc->layer_encoding_bitrate[i]);
152 printf(
"Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
153 rc->layer_avg_frame_size[i]);
154 printf(
"Average rate_mismatch: %f \n", rc->layer_avg_rate_mismatch[i]);
156 "Number of input frames, encoded (non-key) frames, "
157 "and perc dropped frames: %d %d %f \n",
158 rc->layer_input_frames[i], rc->layer_enc_frames[i],
159 100.0 * num_dropped / rc->layer_input_frames[i]);
162 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
163 rc->variance_st_encoding_bitrate =
164 rc->variance_st_encoding_bitrate / rc->window_count -
165 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
166 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
167 rc->avg_st_encoding_bitrate;
168 printf(
"Short-time stats, for window of %d frames: \n", rc->window_size);
169 printf(
"Average, rms-variance, and percent-fluct: %f %f %f \n",
170 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
172 if ((frame_cnt - 1) != tot_num_frames)
173 die(
"Error: Number of input frames not equal to output! \n");
181 uint8_t is_vp8 = strncmp(enc_name,
"vp8", 3) == 0 ? 1 : 0;
182 uint8_t is_vp9 = strncmp(enc_name,
"vp9", 3) == 0 ? 1 : 0;
183 if (!is_vp8 && !is_vp9) {
184 die(
"unsupported codec.");
188 block_size = is_vp9 && !is_vp8 ? 8 : 16;
192 roi->
rows = (cfg->
g_h + block_size - 1) / block_size;
193 roi->
cols = (cfg->
g_w + block_size - 1) / block_size;
234 for (i = 0; i < roi->
rows; ++i) {
235 for (j = 0; j < roi->
cols; ++j) {
236 if (i > (roi->
rows >> 2) && i < ((roi->
rows * 3) >> 2) &&
237 j > (roi->
cols >> 2) && j < ((roi->
cols * 3) >> 2)) {
245 int *skip_map,
int *prev_mask_map,
int frame_num) {
246 const int block_size = 8;
248 roi->
rows = (cfg->
g_h + block_size - 1) / block_size;
249 roi->
cols = (cfg->
g_w + block_size - 1) / block_size;
259 for (i = 0; i < roi->
rows; ++i) {
260 for (j = 0; j < roi->
cols; ++j) {
261 const int idx = i * roi->
cols + j;
266 if (skip_map[idx] == 1 && prev_mask_map[idx] == 1) roi->
roi_map[idx] = 3;
268 if (frame_num % 10 == 0)
269 prev_mask_map[idx] = skip_map[idx];
270 else if (prev_mask_map[idx] == 1 && skip_map[idx] == 0)
271 prev_mask_map[idx] = 0;
282 static void set_temporal_layer_pattern(
int layering_mode,
285 int *flag_periodicity) {
286 switch (layering_mode) {
291 *flag_periodicity = 1;
302 int ids[2] = { 0, 1 };
304 *flag_periodicity = 2;
328 int ids[3] = { 0, 1, 1 };
330 *flag_periodicity = 3;
339 layer_flags[1] = layer_flags[2] =
346 int ids[6] = { 0, 2, 2, 1, 2, 2 };
348 *flag_periodicity = 6;
360 layer_flags[1] = layer_flags[2] = layer_flags[4] = layer_flags[5] =
366 int ids[4] = { 0, 2, 1, 2 };
368 *flag_periodicity = 4;
380 layer_flags[1] = layer_flags[3] =
387 int ids[4] = { 0, 2, 1, 2 };
389 *flag_periodicity = 4;
402 layer_flags[1] = layer_flags[3] =
409 int ids[4] = { 0, 2, 1, 2 };
411 *flag_periodicity = 4;
423 layer_flags[1] = layer_flags[3] =
430 int ids[16] = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4 };
432 *flag_periodicity = 16;
441 layer_flags[1] = layer_flags[3] = layer_flags[5] = layer_flags[7] =
442 layer_flags[9] = layer_flags[11] = layer_flags[13] = layer_flags[15] =
445 layer_flags[2] = layer_flags[6] = layer_flags[10] = layer_flags[14] =
447 layer_flags[4] = layer_flags[12] =
454 int ids[2] = { 0, 1 };
456 *flag_periodicity = 8;
478 layer_flags[4] = layer_flags[2];
480 layer_flags[5] = layer_flags[3];
482 layer_flags[6] = layer_flags[4];
484 layer_flags[7] = layer_flags[5];
489 int ids[4] = { 0, 2, 1, 2 };
491 *flag_periodicity = 8;
505 layer_flags[3] = layer_flags[5] =
520 int ids[4] = { 0, 2, 1, 2 };
522 *flag_periodicity = 8;
546 layer_flags[5] = layer_flags[3];
550 layer_flags[7] = layer_flags[3];
559 int ids[4] = { 0, 2, 1, 2 };
561 *flag_periodicity = 4;
582 int ids[4] = { 0, 2, 1, 2 };
584 *flag_periodicity = 8;
594 layer_flags[4] = layer_flags[0];
597 layer_flags[6] = layer_flags[2];
601 layer_flags[3] = layer_flags[1];
602 layer_flags[5] = layer_flags[1];
603 layer_flags[7] = layer_flags[1];
610 static void read_mask(FILE *mask_file,
int *seg_map) {
611 int mask_rows, mask_cols, i, j;
612 int *map_start = seg_map;
613 fscanf(mask_file,
"%d %d\n", &mask_cols, &mask_rows);
614 for (i = 0; i < mask_rows; i++) {
615 for (j = 0; j < mask_cols; j++) {
616 fscanf(mask_file,
"%d ", &seg_map[j]);
618 seg_map[j] = 1 - seg_map[j];
620 seg_map += mask_cols;
626 int main(
int argc,
char **argv) {
635 uint32_t error_resilient = 0;
642 int frame_duration = 1;
643 int layering_mode = 0;
645 int flag_periodicity = 1;
650 const VpxInterface *encoder = NULL;
651 struct VpxInputContext input_ctx;
652 struct RateControlMetrics rc;
654 const int min_args_base = 13;
655 #if CONFIG_VP9_HIGHBITDEPTH
657 int input_bit_depth = 8;
658 const int min_args = min_args_base + 1;
660 const int min_args = min_args_base;
661 #endif // CONFIG_VP9_HIGHBITDEPTH
662 double sum_bitrate = 0.0;
663 double sum_bitrate2 = 0.0;
664 double framerate = 30.0;
666 FILE *mask_file = NULL;
673 zero(rc.layer_target_bitrate);
675 memset(&input_ctx, 0,
sizeof(input_ctx));
677 input_ctx.framerate.numerator = 30;
678 input_ctx.framerate.denominator = 1;
679 input_ctx.only_i420 = 1;
680 input_ctx.bit_depth = 0;
684 if (argc < min_args) {
685 #if CONFIG_VP9_HIGHBITDEPTH
686 die(
"Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
687 "<rate_num> <rate_den> <speed> <frame_drop_threshold> "
688 "<error_resilient> <threads> <mode> "
689 "<Rate_0> ... <Rate_nlayers-1> <bit-depth> \n",
692 die(
"Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
693 "<rate_num> <rate_den> <speed> <frame_drop_threshold> "
694 "<error_resilient> <threads> <mode> "
695 "<Rate_0> ... <Rate_nlayers-1> \n",
697 #endif // CONFIG_VP9_HIGHBITDEPTH
700 encoder = get_vpx_encoder_by_name(argv[3]);
701 if (!encoder) die(
"Unsupported codec.");
705 width = (
unsigned int)strtoul(argv[4], NULL, 0);
706 height = (
unsigned int)strtoul(argv[5], NULL, 0);
707 if (width < 16 || width % 2 || height < 16 || height % 2) {
708 die(
"Invalid resolution: %d x %d", width, height);
711 layering_mode = (int)strtol(argv[12], NULL, 0);
712 if (layering_mode < 0 || layering_mode > 13) {
713 die(
"Invalid layering mode (0..12) %s", argv[12]);
717 if (argc != min_args + mode_to_num_layers[layering_mode] + 1) {
718 die(
"Invalid number of arguments");
721 if (argc != min_args + mode_to_num_layers[layering_mode]) {
722 die(
"Invalid number of arguments");
726 input_ctx.filename = argv[1];
727 open_input_file(&input_ctx);
729 #if CONFIG_VP9_HIGHBITDEPTH
730 switch (strtol(argv[argc - 1], NULL, 0)) {
737 input_bit_depth = 10;
741 input_bit_depth = 12;
743 default: die(
"Invalid bit depth (8, 10, 12) %s", argv[argc - 1]);
747 if (input_ctx.file_type != FILE_TYPE_Y4M) {
751 width, height, 32)) {
752 die(
"Failed to allocate image (%dx%d)", width, height);
757 if (input_ctx.file_type != FILE_TYPE_Y4M) {
759 die(
"Failed to allocate image (%dx%d)", width, height);
762 #endif // CONFIG_VP9_HIGHBITDEPTH
775 #if CONFIG_VP9_HIGHBITDEPTH
781 #endif // CONFIG_VP9_HIGHBITDEPTH
787 speed = (int)strtol(argv[8], NULL, 0);
789 die(
"Invalid speed setting: must be positive");
791 if (strncmp(encoder->name,
"vp9", 3) == 0 && speed > 9) {
792 warn(
"Mapping speed %d to speed 9.\n", speed);
795 for (i = min_args_base;
796 (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) {
797 rc.layer_target_bitrate[i - 13] = (int)strtol(argv[i], NULL, 0);
798 if (strncmp(encoder->name,
"vp8", 3) == 0)
800 else if (strncmp(encoder->name,
"vp9", 3) == 0)
820 cfg.
g_threads = (
unsigned int)strtoul(argv[11], NULL, 0);
822 error_resilient = (uint32_t)strtoul(argv[10], NULL, 0);
823 if (error_resilient != 0 && error_resilient != 1) {
824 die(
"Invalid value for error resilient (0, 1): %d.", error_resilient);
836 set_temporal_layer_pattern(layering_mode, &cfg, layer_flags,
839 set_rate_control_metrics(&rc, &cfg);
841 if (input_ctx.file_type == FILE_TYPE_Y4M) {
842 if (input_ctx.width != cfg.
g_w || input_ctx.height != cfg.
g_h) {
843 die(
"Incorrect width or height: %d x %d", cfg.
g_w, cfg.
g_h);
847 die(
"Incorrect framerate: numerator %d denominator %d",
855 char file_name[PATH_MAX];
857 info.codec_fourcc = encoder->fourcc;
858 info.frame_width = cfg.
g_w;
859 info.frame_height = cfg.
g_h;
863 snprintf(file_name,
sizeof(file_name),
"%s_%d.ivf", argv[2], i);
864 outfile[i] = vpx_video_writer_open(file_name, kContainerIVF, &info);
865 if (!outfile[i]) die(
"Failed to open %s for writing", file_name);
867 assert(outfile[i] != NULL);
873 #if CONFIG_VP9_HIGHBITDEPTH
875 &codec, encoder->codec_interface(), &cfg,
879 #endif // CONFIG_VP9_HIGHBITDEPTH
880 die(
"Failed to initialize encoder");
883 mask_rows = (cfg.
g_h + block_size - 1) / block_size;
884 mask_cols = (cfg.
g_w + block_size - 1) / block_size;
885 mask_map = (
int *)calloc(mask_rows * mask_cols,
sizeof(*mask_map));
886 prev_mask_map = (
int *)calloc(mask_rows * mask_cols,
sizeof(*mask_map));
889 if (strncmp(encoder->name,
"vp8", 3) == 0) {
895 set_roi_map(encoder->name, &cfg, &roi);
897 die_codec(&codec,
"Failed to set ROI map");
899 }
else if (strncmp(encoder->name,
"vp9", 3) == 0) {
901 memset(&svc_params, 0,
sizeof(svc_params));
920 die_codec(&codec,
"Failed to set SVC");
929 if (strncmp(encoder->name,
"vp8", 3) == 0) {
937 const int max_intra_size_pct = 1000;
943 while (frame_avail || got_data) {
944 struct vpx_usec_timer timer;
948 char mask_file_name[255];
955 if (strncmp(encoder->name,
"vp9", 3) == 0) {
957 }
else if (strncmp(encoder->name,
"vp8", 3) == 0) {
961 flags = layer_flags[frame_cnt % flag_periodicity];
962 if (layering_mode == 0) flags = 0;
964 snprintf(mask_file_name,
sizeof(mask_file_name),
"%s%05d.txt",
965 argv[argc - 1], frame_cnt);
966 mask_file = fopen(mask_file_name,
"r");
967 if (mask_file != NULL) {
968 read_mask(mask_file, mask_map);
971 set_roi_skip_map(&cfg, &roi, mask_map, prev_mask_map, frame_cnt);
973 die_codec(&codec,
"Failed to set ROI map");
976 frame_avail = read_frame(&input_ctx, &raw);
978 vpx_usec_timer_start(&timer);
981 die_codec(&codec,
"Failed to encode frame");
983 vpx_usec_timer_mark(&timer);
984 cx_time += vpx_usec_timer_elapsed(&timer);
986 if (layering_mode != 7) {
996 vpx_video_writer_write_frame(outfile[i], pkt->
data.
frame.buf,
998 ++rc.layer_tot_enc_frames[i];
999 rc.layer_encoding_bitrate[i] += 8.0 * pkt->
data.
frame.sz;
1003 rc.layer_avg_frame_size[i] += 8.0 * pkt->
data.
frame.sz;
1004 rc.layer_avg_rate_mismatch[i] +=
1005 fabs(8.0 * pkt->
data.
frame.sz - rc.layer_pfb[i]) /
1007 ++rc.layer_enc_frames[i];
1013 if (rc.window_size == 0) rc.window_size = 15;
1014 if (frame_cnt > rc.window_size) {
1015 sum_bitrate += 0.001 * 8.0 * pkt->
data.
frame.sz * framerate;
1016 if (frame_cnt % rc.window_size == 0) {
1017 rc.window_count += 1;
1018 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
1019 rc.variance_st_encoding_bitrate +=
1020 (sum_bitrate / rc.window_size) *
1021 (sum_bitrate / rc.window_size);
1026 if (frame_cnt > rc.window_size + rc.window_size / 2) {
1027 sum_bitrate2 += 0.001 * 8.0 * pkt->
data.
frame.sz * framerate;
1028 if (frame_cnt > 2 * rc.window_size &&
1029 frame_cnt % rc.window_size == 0) {
1030 rc.window_count += 1;
1031 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
1032 rc.variance_st_encoding_bitrate +=
1033 (sum_bitrate2 / rc.window_size) *
1034 (sum_bitrate2 / rc.window_size);
1043 pts += frame_duration;
1047 free(prev_mask_map);
1049 close_input_file(&input_ctx);
1050 printout_rate_control_summary(&rc, &cfg, frame_cnt);
1052 printf(
"Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
1053 frame_cnt, 1000 * (
float)cx_time / (
double)(frame_cnt * 1000000),
1054 1000000 * (
double)frame_cnt / (
double)cx_time);
1059 for (i = 0; i < cfg.
ts_number_layers; ++i) vpx_video_writer_close(outfile[i]);
1061 if (input_ctx.file_type != FILE_TYPE_Y4M) {
1068 return EXIT_SUCCESS;