fixed conflicts

angentanewbe · Mar 10, 2024 · ac5ab5a · ac5ab5a
2 parents 0d42756 + 40aecfb
commit ac5ab5a
Show file tree

Hide file tree

Showing 4 changed files with 100 additions and 71 deletions.
diff --git a/lddecode/core.py b/lddecode/core.py
@@ -27,7 +27,7 @@
 from .utils import LRUupdate, clb_findbursts, angular_mean_helper, phase_distance
 from .utils import build_hilbert, unwrap_hilbert, emphasis_iir, filtfft
 from .utils import fft_do_slice, fft_determine_slices, StridedCollector, hz_to_output_array
-from .utils import Pulse, nb_std, nb_gt, n_ornotrange
+from .utils import Pulse, nb_std, nb_gt, n_ornotrange, nb_concatenate
 
 try:
     # If Anaconda's numpy is installed, mkl will use all threads for fft etc
@@ -43,7 +43,7 @@
 # and ld-decode.  Probably should just bring all logging in here...
 logger = None
 
-# If profiling is not enabled, make it a pass-through function
+# If profiling is not enabled, make it a pass-through wrapper
 try:
     profile
 except:
@@ -981,8 +981,8 @@ def __init__(
         self.request         = 0
         self.ended           = False
 
-        self.deqeue_thread   = threading.Thread(target=self.dequeue, daemon=True)
-        num_worker_threads   = max(num_worker_threads - 1, 1)
+        self.deqeue_thread      = threading.Thread(target=self.dequeue, daemon=True)
+        self.num_worker_threads = num_worker_threads
 
         for i in range(num_worker_threads):
             t = threading.Thread(
@@ -1035,13 +1035,18 @@ def apply_newparams(self, newparams):
 
         self.rf.computefilters()
 
-    def worker(self):
+    def worker(self, return_on_empty=False):
+        ''' return_on_empty is used when running non-threaded so this can be
+            directly called '''
         blocksrun = 0
         blockstime = 0
 
         rf = RFDecode(**self.rf_args)
 
         while True:
+            if return_on_empty and self.q_in.qsize() == 0:
+                return
+
             item = self.q_in.get()
 
             if item is None or item[0] == "END":
@@ -1077,7 +1082,7 @@ def worker(self):
             elif item[0] == "NEWPARAMS":
                 self.apply_newparams(item[1])
 
-
+    @profile
     def doread(self, blocknums, MTF, redo=False, prefetch=False):
         need_blocks = []
         queuelist = []
@@ -1197,6 +1202,7 @@ def dequeue(self):
                         self.rf.blockcut : -self.rf.blockcut_end
                     ]
 
+    @profile
     def read(self, begin, length, MTF=0, getraw = False, forceredo=False):
         # transpose the cache by key, not block
         # This is a list of entries in the output from the threaded
@@ -1231,6 +1237,9 @@ def read(self, begin, length, MTF=0, getraw = False, forceredo=False):
             return rv
 
         while need_blocks is not None and len(need_blocks):
+            if self.num_worker_threads == 0:
+                self.worker(return_on_empty=True)
+
             self.q_out_event.wait(.01)
             need_blocks = self.doread(toread, MTF)
             if need_blocks:
@@ -1252,7 +1261,7 @@ def read(self, begin, length, MTF=0, getraw = False, forceredo=False):
 
         rv = {}
         for k in t.keys():
-            rv[k] = np.concatenate(t[k]) if len(t[k]) else None
+            rv[k] = nb_concatenate(t[k]) if len(t[k]) else None
 
         if rv["audio"] is not None:
             rv["audio_phase1"] = rv["audio"]
@@ -1445,6 +1454,7 @@ def __init__(
         initphase=False,
         fields_written=0,
         readloc=0,
+        use_threads=True
     ):
         self.rawdata = decode["input"]
         self.data = decode
@@ -1474,7 +1484,9 @@ def __init__(
         # this is eventually set to 262/263 and 312/313 for audio timing
         self.linecount = None
 
-    #@profile
+        self.use_threads = use_threads
+
+    @profile
     def process(self):
         self.linelocs1, self.linebad, self.nextfieldoffset = self.compute_linelocs()
         #print(self.readloc, self.linelocs1, self.nextfieldoffset)
@@ -2072,6 +2084,7 @@ def getLine0(self, validpulses, meanlinelen):
             return line0loc_local, self.vblank_next, isFirstField_local
         elif line0loc_prev is not None:
             new_sync_confidence = np.max(conf_prev - 10, 0)
+            new_sync_confidence = max(new_sync_confidence, 10)
             self.sync_confidence = min(self.sync_confidence, new_sync_confidence)
             return line0loc_prev, self.vblank_next, isFirstField_prev
         elif line0loc_next is not None:
@@ -2485,16 +2498,21 @@ def downscale(
         audio_thread = None
         if audio != 0 and self.rf.decode_analog_audio:
             audio_rv = {}
-            audio_thread = threading.Thread(target=downscale_audio, args=(
+            dsa_args = (
                 self.data["audio"],
                 lineinfo,
                 self.rf,
                 self.linecount,
                 audio_offset,
                 audio,
                 audio_rv)
-            )
-            audio_thread.start()
+
+            if self.use_threads:
+                audio_thread = threading.Thread(target=downscale_audio, args=dsa_args)
+                audio_thread.start()
+            else:
+                # return values will still be in audio_rv later
+                downscale_audio(*dsa_args)
 
         dsout = np.zeros((linesout * outwidth), dtype=np.double)
         # self.lineoffset is an adjustment for 0-based lines *before* downscaling so add 1 here
@@ -2532,8 +2550,10 @@ def downscale(
             dsout = self.hz_to_output(dsout)
             self.dspicture = dsout
 
-        if audio_thread:
-            audio_thread.join()
+        if audio != 0 and self.rf.decode_analog_audio:
+            if audio_thread:
+                audio_thread.join()
+
             self.dsaudio = audio_rv["dsaudio"]
             self.audio_next_offset = audio_rv["audio_next_offset"]
 
@@ -3176,6 +3196,7 @@ def get_burstlevel(self, l, linelocs=None):
             # Should we warn here? (Provided this can actually occur.)
             return 0
 
+    @profile
     def compute_burst_offsets(self, linelocs):
         rising_sum = 0
         adjs = {}
@@ -3195,6 +3216,7 @@ def compute_burst_offsets(self, linelocs):
 
         return field14, adjs
 
+    @profile
     def refine_linelocs_burst(self, linelocs=None):
         if linelocs is None:
             linelocs = self.linelocs2
@@ -3273,6 +3295,7 @@ def apply_offsets(self, linelocs, phaseoffset, picoffset=0):
     def __init__(self, *args, **kwargs):
         super(FieldNTSC, self).__init__(*args, **kwargs)
 
+    @profile
     def process(self):
         super(FieldNTSC, self).process()
 
@@ -3344,6 +3367,8 @@ def __init__(
             self.lpf.add_function(Field.process)
             self.lpf.add_function(Field.compute_linelocs)
             self.lpf.add_function(Field.getpulses)
+            self.lpf.add_function(DemodCache.read)
+            #self.lpf.add_function(self.decodefield)
 
         self.analog_audio = int(analog_audio)
         self.digital_audio = digital_audio
@@ -3479,6 +3504,9 @@ def close(self):
 
         self.demodcache.end()
 
+        if self.use_profiler:
+            self.lpf.print_stats()
+
     def roughseek(self, location, isField=True):
         self.prevPhaseID = None
 
@@ -3652,9 +3680,12 @@ def decodefield(self, start, mtf_level, prevfield=None, initphase=False, redo=Fa
 
     @profile
     def readfield(self, initphase=False):
-        done = False
+        done     = False
         adjusted = False
-        redo = None
+        redo     = None
+        df_args  = None
+        f        = None
+        offset   = 0
 
         if len(self.fieldstack) >= 2:
             # XXX: Need to cut off the previous field here, since otherwise
@@ -3674,15 +3705,16 @@ def readfield(self, initphase=False):
                 # Only allow one redo, no matter what
                 done = True
                 redo = None
-            elif self.decodethread and self.decodethread.ident:
-                self.decodethread.join()
-                self.decodethread = None
-
-                f, offset = self.threadreturn['field'], self.threadreturn['offset']
-            else: # assume first run
-                f = None
-                offset = 0
-
+            else:
+                if self.decodethread and self.decodethread.ident:
+                    self.decodethread.join()
+                    self.decodethread = None
+
+                # In non-threaded mode self.threadreturn was filled earlier...
+                # ... but if the first call, this is empty
+                if len(self.threadreturn) > 0:
+                    f, offset = self.threadreturn['field'], self.threadreturn['offset']
+
             # Start new thread
             self.threadreturn = {}
             if f and f.valid:
@@ -3697,8 +3729,11 @@ def readfield(self, initphase=False):
 
             df_args = (toffset, self.mtf_level, prevfield, initphase, False, self.threadreturn)
 
-            self.decodethread = threading.Thread(target=self.decodefield, args=df_args)
-            self.decodethread.start()
+            if self.numthreads != 0:
+                self.decodethread = threading.Thread(target=self.decodefield, args=df_args)
+                self.decodethread.start()
+            else:
+                self.decodefield(*df_args)
 
             # process previous run
             if f:
@@ -3708,6 +3743,8 @@ def readfield(self, initphase=False):
                 self.fieldstack.insert(0, None)
 
             if f and f.valid:
+                # Downscaling is time consuming, but currently things are
+                # blocking on the decode thread started above finishing
                 picture, audio, efm = f.downscale(
                     linesout=self.output_lines, 
                     final=True, 
@@ -3760,9 +3797,10 @@ def readfield(self, initphase=False):
                 else:
                     done = True
                     fieldlength = f.linelocs[self.output_lines] - f.linelocs[0]
-                    minlength = (f.inlinelen * self.output_lines) - 2
-                    if ((f.sync_confidence < 50) and (fieldlength < minlength)):
-                        logger.warning("WARNING: Player skip detected, check output in this area")
+                    fieldlength /= f.inlinelen
+                    if ((f.sync_confidence < 50) and not 
+                         inrange(fieldlength, self.output_lines - 2, self.output_lines + 2)):
+                        logger.warning("WARNING: Possible player skip detected - check output")
 
                     self.fieldstack.insert(0, f)
 

diff --git a/lddecode/main.py b/lddecode/main.py
@@ -269,6 +269,13 @@ def main(args=None):
         default=None,
         help="Analog audio filter width",
     )
+    parser.add_argument(
+        "--use_profiler",
+        action="store_true",
+        default=False,
+        help="Enable line_profiler on select functions",
+    )
+
 
     args = parser.parse_args(args)
     # print(args)
@@ -295,6 +302,7 @@ def main(args=None):
         "MTF_offset": args.MTF_offset,
         "audio_filterwidth": args.audio_filterwidth,
         "AC3": args.AC3,
+        "use_profiler": args.use_profiler,
     }
 
     if vid_standard == "NTSC" and args.NTSC_color_notch_filter:

diff --git a/lddecode/utils.py b/lddecode/utils.py
@@ -23,6 +23,13 @@
 except ImportError:
     pass
 
+# If profiling is not enabled, make it a pass-through wrapper
+try:
+    profile
+except:
+    def profile(fn):
+        return fn
+
 # This runs a cubic scaler on a line.
 # originally from https://www.paulinternet.nl/?page=bicubic
 @njit(nogil=True, cache=True)
@@ -262,56 +269,20 @@ def unpack_data_4_40(indata, readlen, offset):
     # Could've used dtype argument to right_shift but numba didn't like that.
     #
     indatai16 = indata.astype(np.uint16)
-    unpacked[0::4] = (indatai16[0::5] << 2) | ((indata[1::5] >> 6) & 0x03)
+
+    unpacked[0::4] = ( indatai16[0::5] << 2)         | ((indata[1::5] >> 6) & 0x03)
     unpacked[1::4] = ((indatai16[1::5] & 0x3F) << 4) | ((indata[2::5] >> 4) & 0x0F)
     unpacked[2::4] = ((indatai16[2::5] & 0x0F) << 6) | ((indata[3::5] >> 2) & 0x3F)
     unpacked[3::4] = ((indatai16[3::5] & 0x03) << 8) | indata[4::5]
 
     # convert back to original DdD 16-bit format (signed 16-bit, left shifted)
     rv_unsigned = unpacked[offset : offset + readlen]
-    rv_signed = np.left_shift(rv_unsigned - 512, 6).astype(np.int16)
-
-    # # Original implementation.
-    #  unpacked = np.zeros(readlen + 4, dtype=np.uint16)
-    # # we need to load the 8-bit data into the 16-bit unpacked for left_shift to work
-    # # correctly...
-    # unpacked[0::4] = indata[0::5]
-    # np.left_shift(unpacked[0::4], 2, out=unpacked[0::4])
-    # np.bitwise_or(
-    #     unpacked[0::4],
-    #     np.bitwise_and(np.right_shift(indata[1::5], 6), 0x03),
-    #     out=unpacked[0::4],
-    # )
-
-    # unpacked[1::4] = np.bitwise_and(indata[1::5], 0x3F)
-    # np.left_shift(unpacked[1::4], 4, out=unpacked[1::4])
-    # np.bitwise_or(
-    #     unpacked[1::4],
-    #     np.bitwise_and(np.right_shift(indata[2::5], 4), 0x0F),
-    #     out=unpacked[1::4],
-    # )
-
-    # unpacked[2::4] = np.bitwise_and(indata[2::5], 0x0F)
-    # np.left_shift(unpacked[2::4], 6, out=unpacked[2::4])
-    # np.bitwise_or(
-    #     unpacked[2::4],
-    #     np.bitwise_and(np.right_shift(indata[3::5], 2), 0x3F),
-    #     out=unpacked[2::4],
-    # )
-
-    # unpacked[3::4] = np.bitwise_and(indata[3::5], 0x03)
-    # np.left_shift(unpacked[3::4], 8, out=unpacked[3::4])
-    # np.bitwise_or(unpacked[3::4], indata[4::5], out=unpacked[3::4])
-
-    # # convert back to original DdD 16-bit format (signed 16-bit, left shifted)
-    # rv_unsigned = unpacked[offset : offset + readlen].copy()
-    # rv_signed = np.left_shift(rv_unsigned.astype(np.int16) - 512, 6)
+    rv_signed   = np.left_shift(rv_unsigned - 512, 6).astype(np.int16)
 
     return rv_signed
 
 
-# The bit twiddling is a bit more complex than I'd like... but eh.  I think
-# it's debugged now. ;)
+@profile
 def load_packed_data_4_40(infile, sample, readlen):
     """Load data from packed DdD format (4 x 10-bits packed in 5 bytes)"""
     start = (sample // 4) * 5
@@ -1032,6 +1003,18 @@ def LRUupdate(l, k):
 def nb_median(m):
     return np.median(m)
 
+# Enabling nogil here kills performance - cache issues?
+@njit(cache=True,nogil=False)
+def nb_concatenate(m):
+    tlen = sum([len(i) for i in m])
+
+    out = np.empty(tlen, dtype=m[0].dtype)
+    pos = 0
+    for i in m:
+        out[pos : pos + len(i)] = i
+        pos += len(i)
+
+    return out
 
 @njit(cache=True,nogil=True)
 def nb_round(m):

diff --git a/scripts/encode-ntsc-video b/scripts/encode-ntsc-video
@@ -3,5 +3,5 @@
 # Simple 1080p60 encoding script, intended for use with video and/or mixed footage.  Note that this is not the 
 # highest quality solution 
 
-ld-process-efm  --input $1.efm --audio $1.efm.pcm
-ld-dropout-correct --output-json $1-dropout.tbc.json $1.tbc - | ld-chroma-decoder -f ntsc3d --input-json $1.tbc.json -  - | ffmpeg -f s16le -ar 44.1k -ac 2 -i $1.efm.pcm -f rawvideo -r 30000/1001 -pix_fmt rgb48 -s 760x488 -i /dev/stdin -vf bwdif,scale=1440x1080 -b:v 15000k -aspect 4:3 -acodec flac -s 1440x1080 -vcodec mpeg4 $1.mkv 
+#ld-process-efm  --input $1.efm --audio $1.efm.pcm
+ld-dropout-correct --output-json $1-dropout.tbc.json $1.tbc - | ld-chroma-decoder -f ntsc3d --input-json $1.tbc.json -  - | ffmpeg -f s16le -ar 44.1k -ac 2 -i $1.pcm -f rawvideo -r 30000/1001 -pix_fmt rgb48 -s 760x488 -i /dev/stdin -vf bwdif,scale=1440x1080 -b:v 15000k -aspect 4:3 -acodec flac -s 1440x1080 -vcodec mpeg4 $1.mkv