A first version of the Sampler object.

manoskary · Sep 29, 2023 · fa98974 · fa98974
1 parent 8b12a07
commit fa98974
Showing 1 changed file with 44 additions and 79 deletions.
diff --git a/graphmuse/samplers/sampling_sketch.py b/graphmuse/samplers/sampling_sketch.py
@@ -13,98 +13,63 @@ def __init__(self, graphs, subgraph_size, subgraphs, num_layers=None):
 	def prepare_data(self):
 		graph_sizes = np.array([g.num_nodes for g in self.graphs])
 		multiples = graph_sizes // self.subgraph_size + 1
-        # Create a list of indices repeating each graph the appropriate number of times
-        indices = np.concatenate([np.repeat(i, m) for i, m in enumerate(multiples)])
+		indices = np.concatenate([np.repeat(i, m) for i, m in enumerate(multiples)])
+		# returns a list of indices with repeats have to look to documentation for precise format.
 
+	def random_score_region(self, graph_idx, check_possibility=True):
+		if graph_idx in self.onsets.keys():
+			onsets = self.onsets[graph_idx]
+			onset_count = self.onset_count[graph_idx]
+		else:
+			onsets = self.graphs.note_array['onset_div'].astype(np.int32)
+			uniques, onset_count = np.unique(onsets, return_counts=True)
+			self.onsets[graph_idx] = onsets
+			self.onset_count[graph_idx] = onset_count
 
+		# in order to avoid handling the special case where a region is sampled that reaches to the end of 'onsets', we simply extend the possible values
+		indices = np.concatenate([self.subgraph_size,[len(onsets)]])
 
+		if check_possibility:
+			if (np.diff(indices)>self.subgraph_size).all():
+				raise ValueError("by including all notes with the same onset, the budget is always exceeded")
 
+		# since we added the last element ourselves and it isn't a valid index,
+		# we only sample excluding the last element
+		# using a random permutation isn't necessarily, it just avoids sampling a previous sample
+		for idx in np.random.permutation(len(indices)-1):
+			samples_start = indices[idx]
 
+			if samples_start+self.subgraph_size>=len(onsets):
+				return (samples_start,len(onsets))
 
-def random_score_region(onsets, budget, check_possibility=True):
-	_, indices = np.unique(onsets,return_index=True)
-
-	# in order to avoid handling the special case where a region is sampled that reaches to the end of 'onsets', we simply extend the possible values
-	indices = np.concatenate([indices,[len(onsets)]])
-
-	if check_possibility:
-		if (np.diff(indices)>budget).all():
-			raise ValueError("by including all notes with the same onset, the budget is always exceeded")
-
-	# since we added the last element ourselves and it isn't a valid index,
-	# we only sample excluding the last element
-	# using a random permutation isn't necessarily, it just avoids sampling a previous sample
-	for idx in numpy.random.permutation(len(indices)-1):
-		samples_start = indices[idx]
-
-		if samples_start+budget>=len(onsets):
-			return (samples_start,len(onsets))
-
-		samples_end = samples_start+budget
-
-		while samples_end-1>=samples_start and onsets[samples_end]==onsets[samples_end-1]:
-			samples_end-=1
-
-		if samples_start<samples_end:
-			return (samples_start, samples_end)
-
-
-	if check_possibility:
-		assert False, "a result should be possible, according to the check above, however, no result exists."
-	else:
-		raise ValueError("by including all notes with the same onset, the budget is always exceeded")
-
-
-
-
-class Graph:
-	def __init__(self,note_array):
-		self.note_array = note_array
-
-	def size(self):
-		return len(self.note_array['onset_div'])
-
-
-
-def musical_sampling(graphs, max_subgraph_size, subgraph_count, check_possibility=True):
-	# we want to sample from the array 'graphs' proportional to the size of the graphs in the array
-	# so we need to pre-compute a probability distribution for that
-	graph_probs = numpy.empty(len(graphs))
-
-	total_size = 0
-
-	for i,g in enumerate(graphs):
-		graph_probs[i] = g.size()
-		total_size += graph_probs[i]
+			samples_end = samples_start+self.subgraph_size
 
-	graph_probs/=total_size
+			while samples_end-1>=samples_start and onsets[samples_end]==onsets[samples_end-1]:
+				samples_end-=1
 
-	# main loop
-	subgraphs = []
+			if samples_start<samples_end:
+				return (samples_start, samples_end)
 
-	for _ in range(subgraph_count):
-		g_idx = numpy.random.choice(len(graphs), p=graph_probs)
 
-		if graphs[g_idx].size()<=max_subgraph_size:
-			(l,r)=(0,graphs[g_idx].size())
+		if check_possibility:
+			assert False, "a result should be possible, according to the check above, however, no result exists."
 		else:
-			(l,r)=random_score_region(graphs[g_idx].note_array['onset_div'], max_subgraph_size, check_possibility)
-			assert r-l<=max_subgraph_size
-
-		subgraphs.append((g_idx,(l,r)))
-
-	return subgraphs
-
+			raise ValueError("by including all notes with the same onset, the budget is always exceeded")
 
+	def musical_sampling(self, g_idxs, check_possibility=True):
+		# we want to sample from the array 'graphs' proportional to the size of the graphs in the array
+		# so we need to pre-compute a probability distribution for that
+		graphs = [self.graphs[i] for i in g_idxs]
+		subgraphs = []
+		for i,g in enumerate(graphs):
 
-note_arrays = [sorted(numpy.random.randint(0,20,size=numpy.random.randint(1,20))) for _ in range(10)]
+			if g.size() <= self.subgraph_size:
+				(l, r) = (0, g.size())
+			else:
+				(l, r) = self.random_score_region(g_idxs[i], check_possibility)
+				assert r - l <= self.subgraph_size
 
-for i,n in enumerate(note_arrays):
-	print(i,":",n)
-print("-------------------------------------------------")
-graphs = [Graph({'onset_div':n}) for n in note_arrays]
+			subgraphs.append((g_idxs[i], (l, r)))
 
-y=musical_sampling(graphs, 10, 7)
+		return subgraphs
 
-for g_idx,(l,r) in y:
-	print(g_idx,":",l, r, note_arrays[g_idx][l:r])