I tried to use the following program to extract slides from a video of a powerpoint:
import os import subprocess import numpy as np import pandas as pd import av def genweights(frame: np.ndarray) -> np.ndarray: """Creqte a meshgrid for weights as exp(-z^2)""" ymax, xmax = frame.shape normfactor = ymax // 2 # y is -1 to 1; x is in same scale but only on rightmost 2/3 y = np.arange(-ymax//2, ymax-ymax//2) / normfactor x = np.arange(-xmax*2//3, xmax-xmax*2//3) / normfactor xx, yy = np.meshgrid(x, y) weights = np.exp(-(xx**2 + yy**2)) weights[:, :xmax//3] = 0 # left one-third = ignore return weights def penalty(prev, this, weight) -> np.ndarray: """Compute the pixel-based penalty of two different images""" this = np.square(this > 128) # convert grayscale into B&W prev = np.square(prev > 128) return np.square(this - prev) * weights MOVIE = "hesabi.mp4" next_t = 2 incr = 2 prev = weight = None threshold = 1e-3 files = [] container = av.open(MOVIE) container.seek(0) for frame in container.decode(video=0): if frame.is_corrupt or frame.time < next_t: continue next_t += incr # Extract this = frame.to_ndarray(format="gray") if prev is None: # initialization for comparison, and skip this frame weights = genweights(this) # reusable weight matrix prev = np.zeros_like(this) continue # Compare and save frame as JPEG image score = np.mean(penalty(prev, this, weights)) prev = this if score > threshold: filename = "screen-{:02.0f}m{:.0f}s.jpg".format(frame.time//60, frame.time%60) files.append(filename) frame.to_image().save(filename) # convert image into PDF and remove temp files subprocess.run(["convert"]+files+["deck.pdf"]) for f in files: os.unlink(f)
However, when I run it I get the following error:
Traceback (most recent call last): File "scap.py", line 47, in <module> score = np.mean(penalty(prev, this, weights)) File "scap.py", line 24, in penalty return np.square(this - prev) * weights ValueError: operands could not be broadcast together with shapes (720,1280) (720,1281)
Please guide me how I can solve it.
Advertisement
Answer
Integer division will round down rather than rounding towards zero. If you replace:
xmax-xmax*2//3
with:
xmax + (-xmax)*2//3
this should solve it.
Parentheses shown for clarity but are not needed.
>>> xmax 1280 >>> np.arange(-xmax*2//3, xmax - xmax*2//3).shape (1281,) >>> np.arange(-xmax*2//3, xmax + -xmax*2//3).shape (1280,)
You should also do similarly with the ymax
. Instead of:
np.arange(-ymax//2, ymax-ymax//2)
use:
np.arange(-ymax//2, ymax + -ymax//2)