WebM Codec SDK
vp9_spatial_svc_encoder
1 /*
2  * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  * Use of this source code is governed by a BSD-style license
5  * that can be found in the LICENSE file in the root of the source
6  * tree. An additional intellectual property rights grant can be found
7  * in the file PATENTS. All contributing project authors may
8  * be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * This is an example demonstrating how to implement a multi-layer
13  * VP9 encoding scheme based on spatial scalability for video applications
14  * that benefit from a scalable bitstream.
15  */
16 
17 #include <math.h>
18 #include <stdarg.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 
23 #include "../args.h"
24 #include "../tools_common.h"
25 #include "../video_writer.h"
26 
27 #include "../vpx_ports/vpx_timer.h"
28 #include "./svc_context.h"
29 #include "vpx/vp8cx.h"
30 #include "vpx/vpx_encoder.h"
31 #include "../vpxstats.h"
32 #include "vp9/encoder/vp9_encoder.h"
33 #include "./y4minput.h"
34 
35 #define OUTPUT_RC_STATS 1
36 
37 #define SIMULCAST_MODE 0
38 
39 static const arg_def_t outputfile =
40  ARG_DEF("o", "output", 1, "Output filename");
41 static const arg_def_t skip_frames_arg =
42  ARG_DEF("s", "skip-frames", 1, "input frames to skip");
43 static const arg_def_t frames_arg =
44  ARG_DEF("f", "frames", 1, "number of frames to encode");
45 static const arg_def_t threads_arg =
46  ARG_DEF("th", "threads", 1, "number of threads to use");
47 #if OUTPUT_RC_STATS
48 static const arg_def_t output_rc_stats_arg =
49  ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
50 #endif
51 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
52 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
53 static const arg_def_t timebase_arg =
54  ARG_DEF("t", "timebase", 1, "timebase (num/den)");
55 static const arg_def_t bitrate_arg = ARG_DEF(
56  "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
57 static const arg_def_t spatial_layers_arg =
58  ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
59 static const arg_def_t temporal_layers_arg =
60  ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
61 static const arg_def_t temporal_layering_mode_arg =
62  ARG_DEF("tlm", "temporal-layering-mode", 1,
63  "temporal layering scheme."
64  "VP9E_TEMPORAL_LAYERING_MODE");
65 static const arg_def_t kf_dist_arg =
66  ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
67 static const arg_def_t scale_factors_arg =
68  ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
69 static const arg_def_t min_q_arg =
70  ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
71 static const arg_def_t max_q_arg =
72  ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
73 static const arg_def_t min_bitrate_arg =
74  ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
75 static const arg_def_t max_bitrate_arg =
76  ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
77 static const arg_def_t lag_in_frame_arg =
78  ARG_DEF(NULL, "lag-in-frames", 1,
79  "Number of frame to input before "
80  "generating any outputs");
81 static const arg_def_t rc_end_usage_arg =
82  ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
83 static const arg_def_t speed_arg =
84  ARG_DEF("sp", "speed", 1, "speed configuration");
85 static const arg_def_t aqmode_arg =
86  ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
87 static const arg_def_t bitrates_arg =
88  ARG_DEF("bl", "bitrates", 1, "bitrates[sl * num_tl + tl]");
89 static const arg_def_t dropframe_thresh_arg =
90  ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
91 static const struct arg_enum_list tune_content_enum[] = {
92  { "default", VP9E_CONTENT_DEFAULT },
93  { "screen", VP9E_CONTENT_SCREEN },
94  { "film", VP9E_CONTENT_FILM },
95  { NULL, 0 }
96 };
97 
98 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
99  NULL, "tune-content", 1, "Tune content type", tune_content_enum);
100 static const arg_def_t inter_layer_pred_arg = ARG_DEF(
101  NULL, "inter-layer-pred", 1, "0 - 3: On, Off, Key-frames, Constrained");
102 
103 #if CONFIG_VP9_HIGHBITDEPTH
104 static const struct arg_enum_list bitdepth_enum[] = {
105  { "8", VPX_BITS_8 }, { "10", VPX_BITS_10 }, { "12", VPX_BITS_12 }, { NULL, 0 }
106 };
107 
108 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
109  "d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ", bitdepth_enum);
110 #endif // CONFIG_VP9_HIGHBITDEPTH
111 
112 static const arg_def_t *svc_args[] = { &frames_arg,
113  &outputfile,
114  &width_arg,
115  &height_arg,
116  &timebase_arg,
117  &bitrate_arg,
118  &skip_frames_arg,
119  &spatial_layers_arg,
120  &kf_dist_arg,
121  &scale_factors_arg,
122  &min_q_arg,
123  &max_q_arg,
124  &min_bitrate_arg,
125  &max_bitrate_arg,
126  &temporal_layers_arg,
127  &temporal_layering_mode_arg,
128  &lag_in_frame_arg,
129  &threads_arg,
130  &aqmode_arg,
131 #if OUTPUT_RC_STATS
132  &output_rc_stats_arg,
133 #endif
134 
135 #if CONFIG_VP9_HIGHBITDEPTH
136  &bitdepth_arg,
137 #endif
138  &speed_arg,
139  &rc_end_usage_arg,
140  &bitrates_arg,
141  &dropframe_thresh_arg,
142  &tune_content_arg,
143  &inter_layer_pred_arg,
144  NULL };
145 
146 static const uint32_t default_frames_to_skip = 0;
147 static const uint32_t default_frames_to_code = 60 * 60;
148 static const uint32_t default_width = 1920;
149 static const uint32_t default_height = 1080;
150 static const uint32_t default_timebase_num = 1;
151 static const uint32_t default_timebase_den = 60;
152 static const uint32_t default_bitrate = 1000;
153 static const uint32_t default_spatial_layers = 5;
154 static const uint32_t default_temporal_layers = 1;
155 static const uint32_t default_kf_dist = 100;
156 static const uint32_t default_temporal_layering_mode = 0;
157 static const uint32_t default_output_rc_stats = 0;
158 static const int32_t default_speed = -1; // -1 means use library default.
159 static const uint32_t default_threads = 0; // zero means use library default.
160 
161 typedef struct {
162  const char *output_filename;
163  uint32_t frames_to_code;
164  uint32_t frames_to_skip;
165  struct VpxInputContext input_ctx;
166  stats_io_t rc_stats;
167  int tune_content;
168  int inter_layer_pred;
169 } AppInput;
170 
171 static const char *exec_name;
172 
173 void usage_exit(void) {
174  fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n",
175  exec_name);
176  fprintf(stderr, "Options:\n");
177  arg_show_usage(stderr, svc_args);
178  exit(EXIT_FAILURE);
179 }
180 
181 static void parse_command_line(int argc, const char **argv_,
182  AppInput *app_input, SvcContext *svc_ctx,
183  vpx_codec_enc_cfg_t *enc_cfg) {
184  struct arg arg;
185  char **argv = NULL;
186  char **argi = NULL;
187  char **argj = NULL;
188  vpx_codec_err_t res;
189  unsigned int min_bitrate = 0;
190  unsigned int max_bitrate = 0;
191  char string_options[1024] = { 0 };
192 
193  // initialize SvcContext with parameters that will be passed to vpx_svc_init
194  svc_ctx->log_level = SVC_LOG_DEBUG;
195  svc_ctx->spatial_layers = default_spatial_layers;
196  svc_ctx->temporal_layers = default_temporal_layers;
197  svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
198 #if OUTPUT_RC_STATS
199  svc_ctx->output_rc_stat = default_output_rc_stats;
200 #endif
201  svc_ctx->speed = default_speed;
202  svc_ctx->threads = default_threads;
203 
204  // start with default encoder configuration
205  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
206  if (res) {
207  die("Failed to get config: %s\n", vpx_codec_err_to_string(res));
208  }
209  // update enc_cfg with app default values
210  enc_cfg->g_w = default_width;
211  enc_cfg->g_h = default_height;
212  enc_cfg->g_timebase.num = default_timebase_num;
213  enc_cfg->g_timebase.den = default_timebase_den;
214  enc_cfg->rc_target_bitrate = default_bitrate;
215  enc_cfg->kf_min_dist = default_kf_dist;
216  enc_cfg->kf_max_dist = default_kf_dist;
217  enc_cfg->rc_end_usage = VPX_CQ;
218 
219  // initialize AppInput with default values
220  app_input->frames_to_code = default_frames_to_code;
221  app_input->frames_to_skip = default_frames_to_skip;
222 
223  // process command line options
224  argv = argv_dup(argc - 1, argv_ + 1);
225  if (!argv) {
226  fprintf(stderr, "Error allocating argument list\n");
227  exit(EXIT_FAILURE);
228  }
229  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
230  arg.argv_step = 1;
231 
232  if (arg_match(&arg, &frames_arg, argi)) {
233  app_input->frames_to_code = arg_parse_uint(&arg);
234  } else if (arg_match(&arg, &outputfile, argi)) {
235  app_input->output_filename = arg.val;
236  } else if (arg_match(&arg, &width_arg, argi)) {
237  enc_cfg->g_w = arg_parse_uint(&arg);
238  } else if (arg_match(&arg, &height_arg, argi)) {
239  enc_cfg->g_h = arg_parse_uint(&arg);
240  } else if (arg_match(&arg, &timebase_arg, argi)) {
241  enc_cfg->g_timebase = arg_parse_rational(&arg);
242  } else if (arg_match(&arg, &bitrate_arg, argi)) {
243  enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
244  } else if (arg_match(&arg, &skip_frames_arg, argi)) {
245  app_input->frames_to_skip = arg_parse_uint(&arg);
246  } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
247  svc_ctx->spatial_layers = arg_parse_uint(&arg);
248  } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
249  svc_ctx->temporal_layers = arg_parse_uint(&arg);
250 #if OUTPUT_RC_STATS
251  } else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
252  svc_ctx->output_rc_stat = arg_parse_uint(&arg);
253 #endif
254  } else if (arg_match(&arg, &speed_arg, argi)) {
255  svc_ctx->speed = arg_parse_uint(&arg);
256  if (svc_ctx->speed > 9) {
257  warn("Mapping speed %d to speed 9.\n", svc_ctx->speed);
258  }
259  } else if (arg_match(&arg, &aqmode_arg, argi)) {
260  svc_ctx->aqmode = arg_parse_uint(&arg);
261  } else if (arg_match(&arg, &threads_arg, argi)) {
262  svc_ctx->threads = arg_parse_uint(&arg);
263  } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
264  svc_ctx->temporal_layering_mode = enc_cfg->temporal_layering_mode =
265  arg_parse_int(&arg);
266  if (svc_ctx->temporal_layering_mode) {
267  enc_cfg->g_error_resilient = 1;
268  }
269  } else if (arg_match(&arg, &kf_dist_arg, argi)) {
270  enc_cfg->kf_min_dist = arg_parse_uint(&arg);
271  enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
272  } else if (arg_match(&arg, &scale_factors_arg, argi)) {
273  strncat(string_options, " scale-factors=",
274  sizeof(string_options) - strlen(string_options) - 1);
275  strncat(string_options, arg.val,
276  sizeof(string_options) - strlen(string_options) - 1);
277  } else if (arg_match(&arg, &bitrates_arg, argi)) {
278  strncat(string_options, " bitrates=",
279  sizeof(string_options) - strlen(string_options) - 1);
280  strncat(string_options, arg.val,
281  sizeof(string_options) - strlen(string_options) - 1);
282  } else if (arg_match(&arg, &min_q_arg, argi)) {
283  strncat(string_options, " min-quantizers=",
284  sizeof(string_options) - strlen(string_options) - 1);
285  strncat(string_options, arg.val,
286  sizeof(string_options) - strlen(string_options) - 1);
287  } else if (arg_match(&arg, &max_q_arg, argi)) {
288  strncat(string_options, " max-quantizers=",
289  sizeof(string_options) - strlen(string_options) - 1);
290  strncat(string_options, arg.val,
291  sizeof(string_options) - strlen(string_options) - 1);
292  } else if (arg_match(&arg, &min_bitrate_arg, argi)) {
293  min_bitrate = arg_parse_uint(&arg);
294  } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
295  max_bitrate = arg_parse_uint(&arg);
296  } else if (arg_match(&arg, &lag_in_frame_arg, argi)) {
297  enc_cfg->g_lag_in_frames = arg_parse_uint(&arg);
298  } else if (arg_match(&arg, &rc_end_usage_arg, argi)) {
299  enc_cfg->rc_end_usage = arg_parse_uint(&arg);
300 #if CONFIG_VP9_HIGHBITDEPTH
301  } else if (arg_match(&arg, &bitdepth_arg, argi)) {
302  enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg);
303  switch (enc_cfg->g_bit_depth) {
304  case VPX_BITS_8:
305  enc_cfg->g_input_bit_depth = 8;
306  enc_cfg->g_profile = 0;
307  break;
308  case VPX_BITS_10:
309  enc_cfg->g_input_bit_depth = 10;
310  enc_cfg->g_profile = 2;
311  break;
312  case VPX_BITS_12:
313  enc_cfg->g_input_bit_depth = 12;
314  enc_cfg->g_profile = 2;
315  break;
316  default:
317  die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
318  break;
319  }
320 #endif // CONFIG_VP9_HIGHBITDEPTH
321  } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
322  enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
323  } else if (arg_match(&arg, &tune_content_arg, argi)) {
324  app_input->tune_content = arg_parse_uint(&arg);
325  } else if (arg_match(&arg, &inter_layer_pred_arg, argi)) {
326  app_input->inter_layer_pred = arg_parse_uint(&arg);
327  } else {
328  ++argj;
329  }
330  }
331 
332  // There will be a space in front of the string options
333  if (strlen(string_options) > 0)
334  vpx_svc_set_options(svc_ctx, string_options + 1);
335 
336  enc_cfg->g_pass = VPX_RC_ONE_PASS;
337 
338  if (enc_cfg->rc_target_bitrate > 0) {
339  if (min_bitrate > 0) {
340  enc_cfg->rc_2pass_vbr_minsection_pct =
341  min_bitrate * 100 / enc_cfg->rc_target_bitrate;
342  }
343  if (max_bitrate > 0) {
344  enc_cfg->rc_2pass_vbr_maxsection_pct =
345  max_bitrate * 100 / enc_cfg->rc_target_bitrate;
346  }
347  }
348 
349  // Check for unrecognized options
350  for (argi = argv; *argi; ++argi)
351  if (argi[0][0] == '-' && strlen(argi[0]) > 1)
352  die("Error: Unrecognized option %s\n", *argi);
353 
354  if (argv[0] == NULL) {
355  usage_exit();
356  }
357  app_input->input_ctx.filename = argv[0];
358  free(argv);
359 
360  open_input_file(&app_input->input_ctx);
361  if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
362  enc_cfg->g_w = app_input->input_ctx.width;
363  enc_cfg->g_h = app_input->input_ctx.height;
364  enc_cfg->g_timebase.den = app_input->input_ctx.framerate.numerator;
365  enc_cfg->g_timebase.num = app_input->input_ctx.framerate.denominator;
366  }
367 
368  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
369  enc_cfg->g_h % 2)
370  die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
371 
372  printf(
373  "Codec %s\nframes: %d, skip: %d\n"
374  "layers: %d\n"
375  "width %d, height: %d,\n"
376  "num: %d, den: %d, bitrate: %d,\n"
377  "gop size: %d\n",
378  vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
379  app_input->frames_to_skip, svc_ctx->spatial_layers, enc_cfg->g_w,
380  enc_cfg->g_h, enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
381  enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
382 }
383 
384 #if OUTPUT_RC_STATS
385 // For rate control encoding stats.
386 struct RateControlStats {
387  // Number of input frames per layer.
388  int layer_input_frames[VPX_MAX_LAYERS];
389  // Total (cumulative) number of encoded frames per layer.
390  int layer_tot_enc_frames[VPX_MAX_LAYERS];
391  // Number of encoded non-key frames per layer.
392  int layer_enc_frames[VPX_MAX_LAYERS];
393  // Framerate per layer (cumulative).
394  double layer_framerate[VPX_MAX_LAYERS];
395  // Target average frame size per layer (per-frame-bandwidth per layer).
396  double layer_pfb[VPX_MAX_LAYERS];
397  // Actual average frame size per layer.
398  double layer_avg_frame_size[VPX_MAX_LAYERS];
399  // Average rate mismatch per layer (|target - actual| / target).
400  double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
401  // Actual encoding bitrate per layer (cumulative).
402  double layer_encoding_bitrate[VPX_MAX_LAYERS];
403  // Average of the short-time encoder actual bitrate.
404  // TODO(marpan): Should we add these short-time stats for each layer?
405  double avg_st_encoding_bitrate;
406  // Variance of the short-time encoder actual bitrate.
407  double variance_st_encoding_bitrate;
408  // Window (number of frames) for computing short-time encoding bitrate.
409  int window_size;
410  // Number of window measurements.
411  int window_count;
412 };
413 
414 // Note: these rate control stats assume only 1 key frame in the
415 // sequence (i.e., first frame only).
416 static void set_rate_control_stats(struct RateControlStats *rc,
417  vpx_codec_enc_cfg_t *cfg) {
418  unsigned int sl, tl;
419  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
420  // per-frame-bandwidth, for the rate control encoding stats below.
421  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
422 
423  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
424  for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
425  const int layer = sl * cfg->ts_number_layers + tl;
426  if (cfg->ts_number_layers == 1)
427  rc->layer_framerate[layer] = framerate;
428  else
429  rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl];
430  if (tl > 0) {
431  rc->layer_pfb[layer] =
432  1000.0 *
433  (cfg->layer_target_bitrate[layer] -
434  cfg->layer_target_bitrate[layer - 1]) /
435  (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
436  } else {
437  rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] /
438  rc->layer_framerate[layer];
439  }
440  rc->layer_input_frames[layer] = 0;
441  rc->layer_enc_frames[layer] = 0;
442  rc->layer_tot_enc_frames[layer] = 0;
443  rc->layer_encoding_bitrate[layer] = 0.0;
444  rc->layer_avg_frame_size[layer] = 0.0;
445  rc->layer_avg_rate_mismatch[layer] = 0.0;
446  }
447  }
448  rc->window_count = 0;
449  rc->window_size = 15;
450  rc->avg_st_encoding_bitrate = 0.0;
451  rc->variance_st_encoding_bitrate = 0.0;
452 }
453 
454 static void printout_rate_control_summary(struct RateControlStats *rc,
455  vpx_codec_enc_cfg_t *cfg,
456  int frame_cnt) {
457  unsigned int sl, tl;
458  double perc_fluctuation = 0.0;
459  int tot_num_frames = 0;
460  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
461  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
463  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
464  tot_num_frames = 0;
465  for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
466  const int layer = sl * cfg->ts_number_layers + tl;
467  const int num_dropped =
468  (tl > 0)
469  ? (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer])
470  : (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] -
471  1);
472  tot_num_frames += rc->layer_input_frames[layer];
473  rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
474  rc->layer_encoding_bitrate[layer] /
475  tot_num_frames;
476  rc->layer_avg_frame_size[layer] =
477  rc->layer_avg_frame_size[layer] / rc->layer_enc_frames[layer];
478  rc->layer_avg_rate_mismatch[layer] = 100.0 *
479  rc->layer_avg_rate_mismatch[layer] /
480  rc->layer_enc_frames[layer];
481  printf("For layer#: sl%d tl%d \n", sl, tl);
482  printf("Bitrate (target vs actual): %d %f.0 kbps\n",
483  cfg->layer_target_bitrate[layer],
484  rc->layer_encoding_bitrate[layer]);
485  printf("Average frame size (target vs actual): %f %f bits\n",
486  rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
487  printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[layer]);
488  printf(
489  "Number of input frames, encoded (non-key) frames, "
490  "and percent dropped frames: %d %d %f.0 \n",
491  rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
492  100.0 * num_dropped / rc->layer_input_frames[layer]);
493  printf("\n");
494  }
495  }
496  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
497  rc->variance_st_encoding_bitrate =
498  rc->variance_st_encoding_bitrate / rc->window_count -
499  (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
500  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
501  rc->avg_st_encoding_bitrate;
502  printf("Short-time stats, for window of %d frames: \n", rc->window_size);
503  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
504  rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
505  perc_fluctuation);
506  printf("Num of input, num of encoded (super) frames: %d %d \n", frame_cnt,
507  tot_num_frames);
508 }
509 
510 static vpx_codec_err_t parse_superframe_index(const uint8_t *data,
511  size_t data_sz, uint64_t sizes[8],
512  int *count) {
513  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
514  // it is a super frame index. If the last byte of real video compression
515  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
516  // not the associated matching marker byte at the front of the index we have
517  // an invalid bitstream and need to return an error.
518 
519  uint8_t marker;
520 
521  marker = *(data + data_sz - 1);
522  *count = 0;
523 
524  if ((marker & 0xe0) == 0xc0) {
525  const uint32_t frames = (marker & 0x7) + 1;
526  const uint32_t mag = ((marker >> 3) & 0x3) + 1;
527  const size_t index_sz = 2 + mag * frames;
528 
529  // This chunk is marked as having a superframe index but doesn't have
530  // enough data for it, thus it's an invalid superframe index.
531  if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME;
532 
533  {
534  const uint8_t marker2 = *(data + data_sz - index_sz);
535 
536  // This chunk is marked as having a superframe index but doesn't have
537  // the matching marker byte at the front of the index therefore it's an
538  // invalid chunk.
539  if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME;
540  }
541 
542  {
543  // Found a valid superframe index.
544  uint32_t i, j;
545  const uint8_t *x = &data[data_sz - index_sz + 1];
546 
547  for (i = 0; i < frames; ++i) {
548  uint32_t this_sz = 0;
549 
550  for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);
551  sizes[i] = this_sz;
552  }
553  *count = frames;
554  }
555  }
556  return VPX_CODEC_OK;
557 }
558 #endif
559 
560 // Example pattern for spatial layers and 2 temporal layers used in the
561 // bypass/flexible mode. The pattern corresponds to the pattern
562 // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
563 // non-flexible mode.
564 static void set_frame_flags_bypass_mode_ex0(
565  int tl, int num_spatial_layers, int is_key_frame,
566  vpx_svc_ref_frame_config_t *ref_frame_config) {
567  int sl;
568  for (sl = 0; sl < num_spatial_layers; ++sl)
569  ref_frame_config->update_buffer_slot[sl] = 0;
570 
571  for (sl = 0; sl < num_spatial_layers; ++sl) {
572  // Set the buffer idx.
573  if (tl == 0) {
574  ref_frame_config->lst_fb_idx[sl] = sl;
575  if (sl) {
576  if (is_key_frame) {
577  ref_frame_config->lst_fb_idx[sl] = sl - 1;
578  ref_frame_config->gld_fb_idx[sl] = sl;
579  } else {
580  ref_frame_config->gld_fb_idx[sl] = sl - 1;
581  }
582  } else {
583  ref_frame_config->gld_fb_idx[sl] = 0;
584  }
585  ref_frame_config->alt_fb_idx[sl] = 0;
586  } else if (tl == 1) {
587  ref_frame_config->lst_fb_idx[sl] = sl;
588  ref_frame_config->gld_fb_idx[sl] =
589  (sl == 0) ? 0 : num_spatial_layers + sl - 1;
590  ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
591  }
592  // Set the reference and update flags.
593  if (!tl) {
594  if (!sl) {
595  // Base spatial and base temporal (sl = 0, tl = 0)
596  ref_frame_config->reference_last[sl] = 1;
597  ref_frame_config->reference_golden[sl] = 0;
598  ref_frame_config->reference_alt_ref[sl] = 0;
599  ref_frame_config->update_buffer_slot[sl] |=
600  1 << ref_frame_config->lst_fb_idx[sl];
601  } else {
602  if (is_key_frame) {
603  ref_frame_config->reference_last[sl] = 1;
604  ref_frame_config->reference_golden[sl] = 0;
605  ref_frame_config->reference_alt_ref[sl] = 0;
606  ref_frame_config->update_buffer_slot[sl] |=
607  1 << ref_frame_config->gld_fb_idx[sl];
608  } else {
609  // Non-zero spatiall layer.
610  ref_frame_config->reference_last[sl] = 1;
611  ref_frame_config->reference_golden[sl] = 1;
612  ref_frame_config->reference_alt_ref[sl] = 1;
613  ref_frame_config->update_buffer_slot[sl] |=
614  1 << ref_frame_config->lst_fb_idx[sl];
615  }
616  }
617  } else if (tl == 1) {
618  if (!sl) {
619  // Base spatial and top temporal (tl = 1)
620  ref_frame_config->reference_last[sl] = 1;
621  ref_frame_config->reference_golden[sl] = 0;
622  ref_frame_config->reference_alt_ref[sl] = 0;
623  ref_frame_config->update_buffer_slot[sl] |=
624  1 << ref_frame_config->alt_fb_idx[sl];
625  } else {
626  // Non-zero spatial.
627  if (sl < num_spatial_layers - 1) {
628  ref_frame_config->reference_last[sl] = 1;
629  ref_frame_config->reference_golden[sl] = 1;
630  ref_frame_config->reference_alt_ref[sl] = 0;
631  ref_frame_config->update_buffer_slot[sl] |=
632  1 << ref_frame_config->alt_fb_idx[sl];
633  } else if (sl == num_spatial_layers - 1) {
634  // Top spatial and top temporal (non-reference -- doesn't update any
635  // reference buffers)
636  ref_frame_config->reference_last[sl] = 1;
637  ref_frame_config->reference_golden[sl] = 1;
638  ref_frame_config->reference_alt_ref[sl] = 0;
639  }
640  }
641  }
642  }
643 }
644 
645 // Example pattern for 2 spatial layers and 2 temporal layers used in the
646 // bypass/flexible mode, except only 1 spatial layer when temporal_layer_id = 1.
647 static void set_frame_flags_bypass_mode_ex1(
648  int tl, int num_spatial_layers, int is_key_frame,
649  vpx_svc_ref_frame_config_t *ref_frame_config) {
650  int sl;
651  for (sl = 0; sl < num_spatial_layers; ++sl)
652  ref_frame_config->update_buffer_slot[sl] = 0;
653 
654  if (tl == 0) {
655  if (is_key_frame) {
656  ref_frame_config->lst_fb_idx[1] = 0;
657  ref_frame_config->gld_fb_idx[1] = 1;
658  } else {
659  ref_frame_config->lst_fb_idx[1] = 1;
660  ref_frame_config->gld_fb_idx[1] = 0;
661  }
662  ref_frame_config->alt_fb_idx[1] = 0;
663 
664  ref_frame_config->lst_fb_idx[0] = 0;
665  ref_frame_config->gld_fb_idx[0] = 0;
666  ref_frame_config->alt_fb_idx[0] = 0;
667  }
668  if (tl == 1) {
669  ref_frame_config->lst_fb_idx[0] = 0;
670  ref_frame_config->gld_fb_idx[0] = 1;
671  ref_frame_config->alt_fb_idx[0] = 2;
672 
673  ref_frame_config->lst_fb_idx[1] = 1;
674  ref_frame_config->gld_fb_idx[1] = 2;
675  ref_frame_config->alt_fb_idx[1] = 3;
676  }
677  // Set the reference and update flags.
678  if (tl == 0) {
679  // Base spatial and base temporal (sl = 0, tl = 0)
680  ref_frame_config->reference_last[0] = 1;
681  ref_frame_config->reference_golden[0] = 0;
682  ref_frame_config->reference_alt_ref[0] = 0;
683  ref_frame_config->update_buffer_slot[0] |=
684  1 << ref_frame_config->lst_fb_idx[0];
685 
686  if (is_key_frame) {
687  ref_frame_config->reference_last[1] = 1;
688  ref_frame_config->reference_golden[1] = 0;
689  ref_frame_config->reference_alt_ref[1] = 0;
690  ref_frame_config->update_buffer_slot[1] |=
691  1 << ref_frame_config->gld_fb_idx[1];
692  } else {
693  // Non-zero spatiall layer.
694  ref_frame_config->reference_last[1] = 1;
695  ref_frame_config->reference_golden[1] = 1;
696  ref_frame_config->reference_alt_ref[1] = 1;
697  ref_frame_config->update_buffer_slot[1] |=
698  1 << ref_frame_config->lst_fb_idx[1];
699  }
700  }
701  if (tl == 1) {
702  // Top spatial and top temporal (non-reference -- doesn't update any
703  // reference buffers)
704  ref_frame_config->reference_last[1] = 1;
705  ref_frame_config->reference_golden[1] = 0;
706  ref_frame_config->reference_alt_ref[1] = 0;
707  }
708 }
709 
710 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
711 static void test_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder,
712  const int frames_out, int *mismatch_seen) {
713  vpx_image_t enc_img, dec_img;
714  struct vp9_ref_frame ref_enc, ref_dec;
715  if (*mismatch_seen) return;
716  /* Get the internal reference frame */
717  ref_enc.idx = 0;
718  ref_dec.idx = 0;
719  vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc);
720  enc_img = ref_enc.img;
721  vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec);
722  dec_img = ref_dec.img;
723 #if CONFIG_VP9_HIGHBITDEPTH
724  if ((enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) !=
725  (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH)) {
726  if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
727  vpx_img_alloc(&enc_img, enc_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH,
728  enc_img.d_w, enc_img.d_h, 16);
729  vpx_img_truncate_16_to_8(&enc_img, &ref_enc.img);
730  }
731  if (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
732  vpx_img_alloc(&dec_img, dec_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH,
733  dec_img.d_w, dec_img.d_h, 16);
734  vpx_img_truncate_16_to_8(&dec_img, &ref_dec.img);
735  }
736  }
737 #endif
738 
739  if (!compare_img(&enc_img, &dec_img)) {
740  int y[4], u[4], v[4];
741 #if CONFIG_VP9_HIGHBITDEPTH
742  if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
743  find_mismatch_high(&enc_img, &dec_img, y, u, v);
744  } else {
745  find_mismatch(&enc_img, &dec_img, y, u, v);
746  }
747 #else
748  find_mismatch(&enc_img, &dec_img, y, u, v);
749 #endif
750  decoder->err = 1;
751  printf(
752  "Encode/decode mismatch on frame %d at"
753  " Y[%d, %d] {%d/%d},"
754  " U[%d, %d] {%d/%d},"
755  " V[%d, %d] {%d/%d}\n",
756  frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1],
757  v[2], v[3]);
758  *mismatch_seen = frames_out;
759  }
760 
761  vpx_img_free(&enc_img);
762  vpx_img_free(&dec_img);
763 }
764 #endif
765 
766 #if OUTPUT_RC_STATS
767 static void svc_output_rc_stats(
768  vpx_codec_ctx_t *codec, vpx_codec_enc_cfg_t *enc_cfg,
769  vpx_svc_layer_id_t *layer_id, const vpx_codec_cx_pkt_t *cx_pkt,
770  struct RateControlStats *rc, VpxVideoWriter **outfile,
771  const uint32_t frame_cnt, const double framerate) {
772  int num_layers_encoded = 0;
773  unsigned int sl, tl;
774  uint64_t sizes[8];
775  uint64_t sizes_parsed[8];
776  int count = 0;
777  double sum_bitrate = 0.0;
778  double sum_bitrate2 = 0.0;
779  vp9_zero(sizes);
780  vp9_zero(sizes_parsed);
781  vpx_codec_control(codec, VP9E_GET_SVC_LAYER_ID, layer_id);
782  parse_superframe_index(cx_pkt->data.frame.buf, cx_pkt->data.frame.sz,
783  sizes_parsed, &count);
784  if (enc_cfg->ss_number_layers == 1) {
785  sizes[0] = cx_pkt->data.frame.sz;
786  } else {
787  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
788  sizes[sl] = 0;
789  if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
790  sizes[sl] = sizes_parsed[num_layers_encoded];
791  num_layers_encoded++;
792  }
793  }
794  }
795  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
796  unsigned int sl2;
797  uint64_t tot_size = 0;
798 #if SIMULCAST_MODE
799  for (sl2 = 0; sl2 < sl; ++sl2) {
800  if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2];
801  }
802  vpx_video_writer_write_frame(outfile[sl],
803  (uint8_t *)(cx_pkt->data.frame.buf) + tot_size,
804  (size_t)(sizes[sl]), cx_pkt->data.frame.pts);
805 #else
806  for (sl2 = 0; sl2 <= sl; ++sl2) {
807  if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2];
808  }
809  if (tot_size > 0)
810  vpx_video_writer_write_frame(outfile[sl], cx_pkt->data.frame.buf,
811  (size_t)(tot_size), cx_pkt->data.frame.pts);
812 #endif // SIMULCAST_MODE
813  }
814  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
815  if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
816  for (tl = layer_id->temporal_layer_id; tl < enc_cfg->ts_number_layers;
817  ++tl) {
818  const int layer = sl * enc_cfg->ts_number_layers + tl;
819  ++rc->layer_tot_enc_frames[layer];
820  rc->layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
821  // Keep count of rate control stats per layer, for non-key
822  // frames.
823  if (tl == (unsigned int)layer_id->temporal_layer_id &&
824  !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
825  rc->layer_avg_frame_size[layer] += 8.0 * sizes[sl];
826  rc->layer_avg_rate_mismatch[layer] +=
827  fabs(8.0 * sizes[sl] - rc->layer_pfb[layer]) /
828  rc->layer_pfb[layer];
829  ++rc->layer_enc_frames[layer];
830  }
831  }
832  }
833  }
834 
835  // Update for short-time encoding bitrate states, for moving
836  // window of size rc->window, shifted by rc->window / 2.
837  // Ignore first window segment, due to key frame.
838  if (frame_cnt > (unsigned int)rc->window_size) {
839  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
840  if (cx_pkt->data.frame.spatial_layer_encoded[sl])
841  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
842  }
843  if (frame_cnt % rc->window_size == 0) {
844  rc->window_count += 1;
845  rc->avg_st_encoding_bitrate += sum_bitrate / rc->window_size;
846  rc->variance_st_encoding_bitrate +=
847  (sum_bitrate / rc->window_size) * (sum_bitrate / rc->window_size);
848  }
849  }
850 
851  // Second shifted window.
852  if (frame_cnt > (unsigned int)(rc->window_size + rc->window_size / 2)) {
853  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
854  sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
855  }
856 
857  if (frame_cnt > (unsigned int)(2 * rc->window_size) &&
858  frame_cnt % rc->window_size == 0) {
859  rc->window_count += 1;
860  rc->avg_st_encoding_bitrate += sum_bitrate2 / rc->window_size;
861  rc->variance_st_encoding_bitrate +=
862  (sum_bitrate2 / rc->window_size) * (sum_bitrate2 / rc->window_size);
863  }
864  }
865 }
866 #endif
867 
868 int main(int argc, const char **argv) {
869  AppInput app_input;
870  VpxVideoWriter *writer = NULL;
871  VpxVideoInfo info;
872  vpx_codec_ctx_t encoder;
873  vpx_codec_enc_cfg_t enc_cfg;
874  SvcContext svc_ctx;
875  vpx_svc_frame_drop_t svc_drop_frame;
876  uint32_t i;
877  uint32_t frame_cnt = 0;
878  vpx_image_t raw;
879  vpx_codec_err_t res;
880  int pts = 0; /* PTS starts at 0 */
881  int frame_duration = 1; /* 1 timebase tick per frame */
882  int end_of_stream = 0;
883  int frames_received = 0;
884 #if OUTPUT_RC_STATS
885  VpxVideoWriter *outfile[VPX_SS_MAX_LAYERS] = { NULL };
886  struct RateControlStats rc;
887  vpx_svc_layer_id_t layer_id;
888  vpx_svc_ref_frame_config_t ref_frame_config;
889  unsigned int sl;
890  double framerate = 30.0;
891 #endif
892  struct vpx_usec_timer timer;
893  int64_t cx_time = 0;
894 #if CONFIG_INTERNAL_STATS
895  FILE *f = fopen("opsnr.stt", "a");
896 #endif
897 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
898  int mismatch_seen = 0;
899  vpx_codec_ctx_t decoder;
900 #endif
901  memset(&svc_ctx, 0, sizeof(svc_ctx));
902  memset(&app_input, 0, sizeof(AppInput));
903  memset(&info, 0, sizeof(VpxVideoInfo));
904  memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t));
905  memset(&rc, 0, sizeof(struct RateControlStats));
906  exec_name = argv[0];
907 
908  /* Setup default input stream settings */
909  app_input.input_ctx.framerate.numerator = 30;
910  app_input.input_ctx.framerate.denominator = 1;
911  app_input.input_ctx.only_i420 = 1;
912  app_input.input_ctx.bit_depth = 0;
913 
914  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
915 
916  // Y4M reader handles its own allocation.
917  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
918 // Allocate image buffer
919 #if CONFIG_VP9_HIGHBITDEPTH
920  if (!vpx_img_alloc(&raw,
921  enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420
922  : VPX_IMG_FMT_I42016,
923  enc_cfg.g_w, enc_cfg.g_h, 32)) {
924  die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
925  }
926 #else
927  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) {
928  die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
929  }
930 #endif // CONFIG_VP9_HIGHBITDEPTH
931  }
932 
933  // Initialize codec
934  if (vpx_svc_init(&svc_ctx, &encoder, vpx_codec_vp9_cx(), &enc_cfg) !=
935  VPX_CODEC_OK)
936  die("Failed to initialize encoder\n");
937 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
938  if (vpx_codec_dec_init(
939  &decoder, get_vpx_decoder_by_name("vp9")->codec_interface(), NULL, 0))
940  die("Failed to initialize decoder\n");
941 #endif
942 
943 #if OUTPUT_RC_STATS
944  rc.window_count = 1;
945  rc.window_size = 15; // Silence a static analysis warning.
946  rc.avg_st_encoding_bitrate = 0.0;
947  rc.variance_st_encoding_bitrate = 0.0;
948  if (svc_ctx.output_rc_stat) {
949  set_rate_control_stats(&rc, &enc_cfg);
950  framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
951  }
952 #endif
953 
954  info.codec_fourcc = VP9_FOURCC;
955  info.frame_width = enc_cfg.g_w;
956  info.frame_height = enc_cfg.g_h;
957  info.time_base.numerator = enc_cfg.g_timebase.num;
958  info.time_base.denominator = enc_cfg.g_timebase.den;
959 
960  writer =
961  vpx_video_writer_open(app_input.output_filename, kContainerIVF, &info);
962  if (!writer)
963  die("Failed to open %s for writing\n", app_input.output_filename);
964 
965 #if OUTPUT_RC_STATS
966  // Write out spatial layer stream.
967  // TODO(marpan/jianj): allow for writing each spatial and temporal stream.
968  if (svc_ctx.output_rc_stat) {
969  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
970  char file_name[PATH_MAX];
971 
972  snprintf(file_name, sizeof(file_name), "%s_s%d.ivf",
973  app_input.output_filename, sl);
974  outfile[sl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
975  if (!outfile[sl]) die("Failed to open %s for writing", file_name);
976  }
977  }
978 #endif
979 
980  // skip initial frames
981  for (i = 0; i < app_input.frames_to_skip; ++i)
982  read_frame(&app_input.input_ctx, &raw);
983 
984  if (svc_ctx.speed != -1)
985  vpx_codec_control(&encoder, VP8E_SET_CPUUSED, svc_ctx.speed);
986  if (svc_ctx.threads) {
988  get_msb(svc_ctx.threads));
989  if (svc_ctx.threads > 1)
990  vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 1);
991  else
992  vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 0);
993  }
994  if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
995  vpx_codec_control(&encoder, VP9E_SET_AQ_MODE, 3);
996  if (svc_ctx.speed >= 5)
999 
1001  app_input.inter_layer_pred);
1002 
1004 
1005  vpx_codec_control(&encoder, VP9E_SET_TUNE_CONTENT, app_input.tune_content);
1006 
1009 
1010  svc_drop_frame.framedrop_mode = FULL_SUPERFRAME_DROP;
1011  for (sl = 0; sl < (unsigned int)svc_ctx.spatial_layers; ++sl)
1012  svc_drop_frame.framedrop_thresh[sl] = enc_cfg.rc_dropframe_thresh;
1013  svc_drop_frame.max_consec_drop = INT_MAX;
1014  vpx_codec_control(&encoder, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame);
1015 
1016  // Encode frames
1017  while (!end_of_stream) {
1018  vpx_codec_iter_t iter = NULL;
1019  const vpx_codec_cx_pkt_t *cx_pkt;
1020  // Example patterns for bypass/flexible mode:
1021  // example_pattern = 0: 2 temporal layers, and spatial_layers = 1,2,3. Exact
1022  // to fixed SVC patterns. example_pattern = 1: 2 spatial and 2 temporal
1023  // layers, with SL0 only has TL0, and SL1 has both TL0 and TL1. This example
1024  // uses the extended API.
1025  int example_pattern = 0;
1026  if (frame_cnt >= app_input.frames_to_code ||
1027  !read_frame(&app_input.input_ctx, &raw)) {
1028  // We need one extra vpx_svc_encode call at end of stream to flush
1029  // encoder and get remaining data
1030  end_of_stream = 1;
1031  }
1032 
1033  // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates)
1034  // and the buffer indices for each spatial layer of the current
1035  // (super)frame to be encoded. The spatial and temporal layer_id for the
1036  // current frame also needs to be set.
1037  // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
1038  // mode to "VP9E_LAYERING_MODE_BYPASS".
1039  if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
1040  layer_id.spatial_layer_id = 0;
1041  // Example for 2 temporal layers.
1042  if (frame_cnt % 2 == 0) {
1043  layer_id.temporal_layer_id = 0;
1044  for (i = 0; i < VPX_SS_MAX_LAYERS; i++)
1045  layer_id.temporal_layer_id_per_spatial[i] = 0;
1046  } else {
1047  layer_id.temporal_layer_id = 1;
1048  for (i = 0; i < VPX_SS_MAX_LAYERS; i++)
1049  layer_id.temporal_layer_id_per_spatial[i] = 1;
1050  }
1051  if (example_pattern == 1) {
1052  // example_pattern 1 is hard-coded for 2 spatial and 2 temporal layers.
1053  assert(svc_ctx.spatial_layers == 2);
1054  assert(svc_ctx.temporal_layers == 2);
1055  if (frame_cnt % 2 == 0) {
1056  // Spatial layer 0 and 1 are encoded.
1057  layer_id.temporal_layer_id_per_spatial[0] = 0;
1058  layer_id.temporal_layer_id_per_spatial[1] = 0;
1059  layer_id.spatial_layer_id = 0;
1060  } else {
1061  // Only spatial layer 1 is encoded here.
1062  layer_id.temporal_layer_id_per_spatial[1] = 1;
1063  layer_id.spatial_layer_id = 1;
1064  }
1065  }
1066  vpx_codec_control(&encoder, VP9E_SET_SVC_LAYER_ID, &layer_id);
1067  // TODO(jianj): Fix the parameter passing for "is_key_frame" in
1068  // set_frame_flags_bypass_model() for case of periodic key frames.
1069  if (example_pattern == 0) {
1070  set_frame_flags_bypass_mode_ex0(layer_id.temporal_layer_id,
1071  svc_ctx.spatial_layers, frame_cnt == 0,
1072  &ref_frame_config);
1073  } else if (example_pattern == 1) {
1074  set_frame_flags_bypass_mode_ex1(layer_id.temporal_layer_id,
1075  svc_ctx.spatial_layers, frame_cnt == 0,
1076  &ref_frame_config);
1077  }
1078  ref_frame_config.duration[0] = frame_duration * 1;
1079  ref_frame_config.duration[1] = frame_duration * 1;
1080 
1082  &ref_frame_config);
1083  // Keep track of input frames, to account for frame drops in rate control
1084  // stats/metrics.
1085  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
1086  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
1087  layer_id.temporal_layer_id];
1088  }
1089  } else {
1090  // For the fixed pattern SVC, temporal layer is given by superframe count.
1091  unsigned int tl = 0;
1092  if (enc_cfg.ts_number_layers == 2)
1093  tl = (frame_cnt % 2 != 0);
1094  else if (enc_cfg.ts_number_layers == 3) {
1095  if (frame_cnt % 2 != 0) tl = 2;
1096  if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0)) tl = 1;
1097  }
1098  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl)
1099  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + tl];
1100  }
1101 
1102  vpx_usec_timer_start(&timer);
1103  res = vpx_svc_encode(
1104  &svc_ctx, &encoder, (end_of_stream ? NULL : &raw), pts, frame_duration,
1105  svc_ctx.speed >= 5 ? VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
1106  vpx_usec_timer_mark(&timer);
1107  cx_time += vpx_usec_timer_elapsed(&timer);
1108 
1109  fflush(stdout);
1110  if (res != VPX_CODEC_OK) {
1111  die_codec(&encoder, "Failed to encode frame");
1112  }
1113 
1114  while ((cx_pkt = vpx_codec_get_cx_data(&encoder, &iter)) != NULL) {
1115  switch (cx_pkt->kind) {
1116  case VPX_CODEC_CX_FRAME_PKT: {
1117  SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal;
1118  if (cx_pkt->data.frame.sz > 0) {
1119  vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf,
1120  cx_pkt->data.frame.sz,
1121  cx_pkt->data.frame.pts);
1122 #if OUTPUT_RC_STATS
1123  if (svc_ctx.output_rc_stat) {
1124  svc_output_rc_stats(&encoder, &enc_cfg, &layer_id, cx_pkt, &rc,
1125  outfile, frame_cnt, framerate);
1126  }
1127 #endif
1128  }
1129  /*
1130  printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
1131  !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
1132  (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
1133  */
1134  if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
1135  si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
1136  ++frames_received;
1137 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
1138  if (vpx_codec_decode(&decoder, cx_pkt->data.frame.buf,
1139  (unsigned int)cx_pkt->data.frame.sz, NULL, 0))
1140  die_codec(&decoder, "Failed to decode frame.");
1141 #endif
1142  break;
1143  }
1144  case VPX_CODEC_STATS_PKT: {
1145  stats_write(&app_input.rc_stats, cx_pkt->data.twopass_stats.buf,
1146  cx_pkt->data.twopass_stats.sz);
1147  break;
1148  }
1149  default: { break; }
1150  }
1151 
1152 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
1153  vpx_codec_control(&encoder, VP9E_GET_SVC_LAYER_ID, &layer_id);
1154  // Don't look for mismatch on top spatial and top temporal layers as they
1155  // are non reference frames.
1156  if ((enc_cfg.ss_number_layers > 1 || enc_cfg.ts_number_layers > 1) &&
1157  !(layer_id.temporal_layer_id > 0 &&
1158  layer_id.temporal_layer_id == (int)enc_cfg.ts_number_layers - 1 &&
1159  cx_pkt->data.frame
1160  .spatial_layer_encoded[enc_cfg.ss_number_layers - 1])) {
1161  test_decode(&encoder, &decoder, frame_cnt, &mismatch_seen);
1162  }
1163 #endif
1164  }
1165 
1166  if (!end_of_stream) {
1167  ++frame_cnt;
1168  pts += frame_duration;
1169  }
1170  }
1171 
1172  printf("Processed %d frames\n", frame_cnt);
1173 
1174  close_input_file(&app_input.input_ctx);
1175 
1176 #if OUTPUT_RC_STATS
1177  if (svc_ctx.output_rc_stat) {
1178  printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
1179  printf("\n");
1180  }
1181 #endif
1182  if (vpx_codec_destroy(&encoder))
1183  die_codec(&encoder, "Failed to destroy codec");
1184  if (writer) {
1185  vpx_video_writer_close(writer);
1186  }
1187 #if OUTPUT_RC_STATS
1188  if (svc_ctx.output_rc_stat) {
1189  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
1190  vpx_video_writer_close(outfile[sl]);
1191  }
1192  }
1193 #endif
1194 #if CONFIG_INTERNAL_STATS
1195  if (mismatch_seen) {
1196  fprintf(f, "First mismatch occurred in frame %d\n", mismatch_seen);
1197  } else {
1198  fprintf(f, "No mismatch detected in recon buffers\n");
1199  }
1200  fclose(f);
1201 #endif
1202  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
1203  frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
1204  1000000 * (double)frame_cnt / (double)cx_time);
1205  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1206  vpx_img_free(&raw);
1207  }
1208  // display average size, psnr
1209  vpx_svc_dump_statistics(&svc_ctx);
1210  vpx_svc_release(&svc_ctx);
1211  return EXIT_SUCCESS;
1212 }
vpx_fixed_buf_t twopass_stats
Definition: vpx_encoder.h:184
unsigned int ts_number_layers
Number of temporal coding layers.
Definition: vpx_encoder.h:646
Codec control function to disable increase Q on overshoot in CBR.
Definition: vp8cx.h:699
Codec control function to set encoder internal speed settings.
Definition: vp8cx.h:172
#define VPX_MAX_LAYERS
Definition: vpx_encoder.h:44
int reference_alt_ref[5]
Definition: vp8cx.h:909
Image Descriptor.
Definition: vpx_image.h:72
Describes the encoder algorithm interface to applications.
const char * vpx_codec_iface_name(vpx_codec_iface_t *iface)
Return the name for a given interface.
Codec control function to constrain the inter-layer prediction (prediction of lower spatial resolutio...
Definition: vp8cx.h:624
const char * vpx_codec_err_to_string(vpx_codec_err_t err)
Convert error number to printable string.
int lst_fb_idx[5]
Definition: vp8cx.h:899
Codec control function to set content type.
Definition: vp8cx.h:480
struct vpx_rational g_timebase
Stream timebase units.
Definition: vpx_encoder.h:345
Codec control function to set noise sensitivity.
Definition: vp8cx.h:438
unsigned int layer_target_bitrate[12]
Target bitrate for each spatial/temporal layer.
Definition: vpx_encoder.h:686
SVC_LAYER_DROP_MODE framedrop_mode
Definition: vp8cx.h:937
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition: vpx_encoder.h:331
int den
Definition: vpx_encoder.h:222
Definition: vpx_encoder.h:150
int framedrop_thresh[5]
Definition: vp8cx.h:935
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: vpx_encoder.h:616
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: vpx_encoder.h:374
Encoder configuration structure.
Definition: vpx_encoder.h:270
int reference_golden[5]
Definition: vp8cx.h:908
The coded data for this stream is corrupt or incomplete.
Definition: vpx_codec.h:133
Codec control function to set row level multi-threading.
Definition: vp8cx.h:575
Codec control function to disable loopfilter.
Definition: vp8cx.h:708
Codec control function to set Max data rate for Intra frames.
Definition: vp8cx.h:274
Encoder output packet.
Definition: vpx_encoder.h:161
void * buf
Definition: vpx_encoder.h:99
unsigned int ts_rate_decimator[5]
Frame rate decimation factor for each temporal layer.
Definition: vpx_encoder.h:660
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: vpx_encoder.h:607
vp9 svc frame dropping parameters.
Definition: vp8cx.h:934
unsigned int g_profile
Bitstream profile to use.
Definition: vpx_encoder.h:297
Codec control function to set number of tile columns.
Definition: vp8cx.h:368
#define VPX_IMG_FMT_HIGHBITDEPTH
Definition: vpx_image.h:35
struct vpx_codec_cx_pkt::@1::@2 frame
#define VPX_SS_MAX_LAYERS
Definition: vpx_encoder.h:47
vpx_image_t * vpx_img_alloc(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
Definition: vpx_image.h:42
unsigned int d_w
Definition: vpx_image.h:83
#define vpx_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for vpx_codec_dec_init_ver()
Definition: vpx_decoder.h:143
unsigned int g_w
Width of the frame.
Definition: vpx_encoder.h:306
int reference_last[5]
Definition: vp8cx.h:907
int update_buffer_slot[5]
Definition: vp8cx.h:902
Codec control function to set adaptive quantization mode.
Definition: vp8cx.h:415
vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline)
Decode data.
Codec control function to get svc layer ID.
Definition: vp8cx.h:488
unsigned int g_h
Height of the frame.
Definition: vpx_encoder.h:315
enum vpx_codec_cx_pkt_kind kind
Definition: vpx_encoder.h:162
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition: vpx_encoder.h:393
vp9 svc layer parameters
Definition: vp8cx.h:883
Operation completed without error.
Definition: vpx_codec.h:95
void vpx_img_free(vpx_image_t *img)
Close an image descriptor.
vpx_img_fmt_t fmt
Definition: vpx_image.h:73
unsigned int rc_target_bitrate
Target data rate.
Definition: vpx_encoder.h:462
#define VPX_DL_REALTIME
deadline parameter analogous to VPx REALTIME mode.
Definition: vpx_encoder.h:978
int num
Definition: vpx_encoder.h:221
Definition: vpx_codec.h:223
vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg, unsigned int usage)
Get a default configuration.
Codec control function to set the frame flags and buffer indices for spatial layers. The frame flags and buffer indices are set using the struct vpx_svc_ref_frame_config defined below.
Definition: vp8cx.h:550
enum vpx_enc_pass g_pass
Multi-pass Encoding Mode.
Definition: vpx_encoder.h:360
Codec control function to set mode and thresholds for frame dropping in SVC. Drop frame thresholds ar...
Definition: vp8cx.h:633
#define VPX_DL_GOOD_QUALITY
deadline parameter analogous to VPx GOOD QUALITY mode.
Definition: vpx_encoder.h:980
unsigned int ss_number_layers
Number of spatial coding layers.
Definition: vpx_encoder.h:626
vpx_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition: vpx_encoder.h:323
Provides definitions for using VP8 or VP9 encoder algorithm within the vpx Codec Interface.
Bypass mode. Used when application needs to control temporal layering. This will only work when the n...
Definition: vp8cx.h:789
Definition: vp8cx.h:922
vpx_codec_err_t
Algorithm return codes.
Definition: vpx_codec.h:93
const vpx_codec_cx_pkt_t * vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, vpx_codec_iter_t *iter)
Encoded data iterator.
union vpx_codec_cx_pkt::@1 data
int temporal_layering_mode
Temporal layering mode indicating which temporal layering scheme to use.
Definition: vpx_encoder.h:695
VP9 specific reference frame data struct.
Definition: vp8.h:110
int temporal_layer_id
Definition: vp8cx.h:886
vpx_codec_iface_t * vpx_codec_vp9_cx(void)
The interface to the VP9 encoder.
int max_consec_drop
Definition: vp8cx.h:938
Definition: vpx_encoder.h:236
int idx
Definition: vp8.h:111
#define vpx_codec_control(ctx, id, data)
vpx_codec_control wrapper macro
Definition: vpx_codec.h:407
vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx)
Destroy a codec instance.
unsigned int d_h
Definition: vpx_image.h:84
size_t sz
Definition: vpx_encoder.h:100
Definition: vpx_codec.h:221
vp9 svc frame flag parameters.
Definition: vp8cx.h:898
vpx_codec_err_t err
Definition: vpx_codec.h:203
Definition: vp8.h:55
Codec control function to set the threshold for MBs treated static.
Definition: vp8cx.h:205
int64_t duration[5]
Definition: vp8cx.h:910
#define VPX_FRAME_IS_KEY
Definition: vpx_encoder.h:118
Definition: vpx_codec.h:222
int alt_fb_idx[5]
Definition: vp8cx.h:901
const void * vpx_codec_iter_t
Iterator.
Definition: vpx_codec.h:190
Definition: vpx_encoder.h:149
unsigned int rc_2pass_vbr_maxsection_pct
Two-pass mode per-GOP maximum bitrate.
Definition: vpx_encoder.h:579
vpx_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: vpx_encoder.h:353
unsigned int rc_2pass_vbr_minsection_pct
Two-pass mode per-GOP minimum bitrate.
Definition: vpx_encoder.h:572
int gld_fb_idx[5]
Definition: vp8cx.h:900
Codec control function to set svc layer for spatial and temporal.
Definition: vp8cx.h:470
enum vpx_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: vpx_encoder.h:442
Definition: vpx_encoder.h:227
Codec context structure.
Definition: vpx_codec.h:200