wavesets and wavesets using k-means clustering
parent
35f5ecbf38
commit
93730257df
@ -0,0 +1 @@
|
||||
*.wav
|
||||
@ -0,0 +1,9 @@
|
||||
(
|
||||
SynthDef(\simple, {
|
||||
var sig, env, amp;
|
||||
|
||||
sig = SinOsc.ar(\freq.ar(440));
|
||||
|
||||
Out.ar(0, sig.dup);
|
||||
}).add;
|
||||
)
|
||||
@ -0,0 +1,149 @@
|
||||
b = Buffer.read(s, "/home/lcoogan/snd/samples/freesound/vocals/ymaaela/attribution/330909__ymaaela__discordant-clip.mono.wav");
|
||||
b = Buffer.read(s, "/home/lcoogan/snd/samples/freesound/vocals/ymaaela/333264__ymaaela__female-vocal-cut-ups-collage.mono.wav");
|
||||
|
||||
b.play;
|
||||
|
||||
s.boot
|
||||
|
||||
|
||||
|
||||
// make a wavesets from a soundfile
|
||||
w = Wavesets.from("/home/lcoogan/snd/samples/freesound/vocals/ymaaela/attribution/330909__ymaaela__discordant-clip.mono.wav");
|
||||
w = Wavesets.from("/home/lcoogan/snd/samples/freesound/vocals/ymaaela/333264__ymaaela__female-vocal-cut-ups-collage.mono.wav");
|
||||
|
||||
w.dump; // contains mainly analysis data
|
||||
|
||||
w.plot(200, 1); // plot a single waveset
|
||||
w.signal.copyRange(w.xings[600], w.xings[601]).plot;
|
||||
|
||||
w.plot(600, 1); // a single
|
||||
w.plot(600, 5); // a group of five contiguous wavesets
|
||||
w.buffer;
|
||||
w.buffer.play;
|
||||
|
||||
// build and add the SynthDefs
|
||||
Wavesets.prepareSynthDefs;
|
||||
|
||||
// eventFor returns an event that can be played directly
|
||||
w.eventFor(startWs: 600, length: 5, repeats: 2).postln.play;
|
||||
w.eventFor(startWs: 600, length: 2, playRate: 1, repeats: 5).postln.play;
|
||||
w.eventFor(startWs: 600, length: 2, playRate: 0.5, repeats: 5).postln.play;
|
||||
w.eventFor(700, 20, 5, 1).play;
|
||||
|
||||
(
|
||||
fork {
|
||||
666.do { |i|
|
||||
var ev = w.eventFor(i * 5, 2, 5, exprand(0.5, 1.0));
|
||||
ev.put(\pan, 1.0.rand2).play;
|
||||
ev.sustain.wait;
|
||||
}
|
||||
};
|
||||
)
|
||||
|
||||
|
||||
|
||||
(
|
||||
b = w.buffer;
|
||||
|
||||
Wavesets.prepareSynthDefs;
|
||||
|
||||
)
|
||||
|
||||
(
|
||||
var start, length, sustain, repeats = 20;
|
||||
#start, length, sustain = w.frameFor(150, 5);
|
||||
|
||||
( instrument: \wvst0, bufnum: b.bufnum, amp:1,
|
||||
start: start, length: length, sustain: sustain * repeats
|
||||
).play;
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
Quarks.directory;
|
||||
|
||||
|
||||
|
||||
|
||||
w.lengths.do({ |len, i|
|
||||
("Waveset %" ++ i ++ ": length = " ++ len ++ ", amp = " ++ w.amps[i]).postln;
|
||||
});
|
||||
|
||||
|
||||
w.plot(0, 1); // arg[0] = waveset index, arg[1] = waveset n
|
||||
|
||||
(
|
||||
// Normalize
|
||||
~lens = w.lengths.normalize;
|
||||
~amps = w.amps.normalize;
|
||||
|
||||
// Combine into 2D array of feature vectors
|
||||
~features = Array.fill(w.lengths.size, { |i|
|
||||
[~lens[i] * 5, ~amps[i]]
|
||||
});
|
||||
|
||||
// Very basic binning approach
|
||||
~k = 10;
|
||||
~clusters = Dictionary.new;
|
||||
~features.do { |vec, i|
|
||||
var clusterID = ((vec[0] + vec[1]) * 0.5 * ~k).floor;
|
||||
~clusters[clusterID] = (~clusters[clusterID] ?? { [] }) ++ [i];
|
||||
};
|
||||
|
||||
~clusters.keysValuesDo { |k, vals| ("Cluster " ++ k ++ ": " ++ vals).postln; };
|
||||
)
|
||||
|
||||
|
||||
(
|
||||
// Play one representative from each cluster
|
||||
~clusters.keysDo { |k|
|
||||
var wsIndex = ~clusters[k][0]; // First waveset in the cluster
|
||||
var ev = w.eventFor(startWs: wsIndex, length: 1, repeats: 1);
|
||||
ev.put(\pan, 1.0.rand2); // Random pan for stereo fun
|
||||
ev.play;
|
||||
};
|
||||
)
|
||||
|
||||
|
||||
(
|
||||
// Build a map of representative wavesets for each cluster
|
||||
~reps = Dictionary.new;
|
||||
~clusters.keysDo { |k|
|
||||
~reps[k] = ~clusters[k][0]; // Could improve: use actual centroid distance
|
||||
};
|
||||
|
||||
// Replace each waveset with its cluster's representative, but keep order
|
||||
fork {
|
||||
w.lengths.size.do { |i|
|
||||
var vec = [~lens[i] * 5, ~amps[i]];
|
||||
var clusterID = ((vec[0] + vec[1]) * 0.5 * ~k).floor;
|
||||
var repIndex = ~reps[clusterID] ?? { i }; // fallback: original if rep not found
|
||||
var ev = w.eventFor(repIndex, 1, 1, 1);
|
||||
ev.put(\pan, 0).play;
|
||||
ev.sustain.wait;
|
||||
};
|
||||
};
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
(
|
||||
// Subtle morph: vary playback based on cluster
|
||||
fork {
|
||||
w.lengths.size.do { |i|
|
||||
var vec = [~lens[i] * 5, ~amps[i]];
|
||||
var clusterID = ((vec[0] + vec[1]) * 0.5 * ~k).floor;
|
||||
var ev = w.eventFor(i, 1, 1, 1 + (clusterID * 0.01));
|
||||
ev.put(\pan, clusterID.linlin(0, ~k, -1, 1));
|
||||
ev.play;
|
||||
ev.sustain.wait;
|
||||
};
|
||||
};
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,73 @@
|
||||
#!/usr/bin/env python
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
from sklearn.cluster import KMeans
|
||||
from scipy.signal import butter, lfilter
|
||||
|
||||
def highpass_filter(audio, sr, cutoff=20.0):
|
||||
b, a = butter(1, cutoff / (sr / 2), btype='highpass')
|
||||
return lfilter(b, a, audio)
|
||||
|
||||
def find_upward_zero_crossings(signal):
|
||||
return np.where((signal[:-1] < 0) & (signal[1:] >= 0))[0] + 1
|
||||
|
||||
def extract_wavesets(signal):
|
||||
zc = find_upward_zero_crossings(signal)
|
||||
return [signal[zc[i]:zc[i+1]] for i in range(len(zc) - 1)], zc
|
||||
|
||||
def compute_features(wavesets):
|
||||
lengths = np.array([len(w) for w in wavesets])
|
||||
rms = np.array([np.sqrt(np.mean(w**2)) for w in wavesets])
|
||||
return lengths, rms
|
||||
|
||||
def normalize_and_weight(lengths, rms, w):
|
||||
lengths = (lengths - np.mean(lengths)) / np.std(lengths)
|
||||
rms = (rms - np.mean(rms)) / np.std(rms)
|
||||
lengths *= w
|
||||
return np.stack([lengths, rms], axis=1)
|
||||
|
||||
def replace_with_representatives(wavesets, labels, centroids, features):
|
||||
reps = []
|
||||
for k in range(centroids.shape[0]):
|
||||
cluster_indices = np.where(labels == k)[0]
|
||||
cluster_features = features[cluster_indices]
|
||||
dists = np.linalg.norm(cluster_features - centroids[k], axis=1)
|
||||
rep_idx = cluster_indices[np.argmin(dists)]
|
||||
reps.append(wavesets[rep_idx])
|
||||
return [reps[label] for label in labels]
|
||||
|
||||
def reconstruct_signal(replaced_sets, zero_crossings, length):
|
||||
output = np.zeros(length)
|
||||
cursor = zero_crossings[0]
|
||||
for i, ws in enumerate(replaced_sets):
|
||||
end = cursor + len(ws)
|
||||
if end <= len(output):
|
||||
output[cursor:end] = ws
|
||||
cursor = end
|
||||
else:
|
||||
break
|
||||
return output
|
||||
|
||||
def waveset_clustering_effect(filepath, output_path, w=5, clusters_per_sec=20):
|
||||
signal, sr = sf.read(filepath)
|
||||
if signal.ndim > 1:
|
||||
signal = signal.mean(axis=1) # Mono
|
||||
|
||||
signal = highpass_filter(signal, sr)
|
||||
wavesets, zero_crossings = extract_wavesets(signal)
|
||||
lengths, rms = compute_features(wavesets)
|
||||
features = normalize_and_weight(lengths, rms, w)
|
||||
|
||||
total_time = len(signal) / sr
|
||||
k = int(clusters_per_sec * total_time)
|
||||
k = max(2, min(k, len(wavesets))) # Avoid trivial or impossible cases
|
||||
|
||||
kmeans = KMeans(n_clusters=k, random_state=0).fit(features)
|
||||
replaced_sets = replace_with_representatives(wavesets, kmeans.labels_, kmeans.cluster_centers_, features)
|
||||
|
||||
reconstructed = reconstruct_signal(replaced_sets, zero_crossings, len(signal))
|
||||
sf.write(output_path, reconstructed, sr)
|
||||
|
||||
# Example usage:
|
||||
waveset_clustering_effect("input.wav", "output.wav", w=5, clusters_per_sec=15)
|
||||
|
||||
@ -0,0 +1,10 @@
|
||||
{ pkgs ? import <nixpkgs> {} }:
|
||||
|
||||
pkgs.mkShell {
|
||||
buildInputs = [
|
||||
pkgs.python313Packages.numpy
|
||||
pkgs.python313Packages.soundfile
|
||||
pkgs.python313Packages.scikit-learn
|
||||
pkgs.python313Packages.scipy
|
||||
];
|
||||
}
|
||||
@ -0,0 +1,130 @@
|
||||
(
|
||||
SynthDef(\wavesetPlayer, { |out=0, bufnum, rate=1, amp=0.3, pan=0|
|
||||
var sig = PlayBuf.ar(1, bufnum, rate * BufRateScale.kr(bufnum), doneAction: 2);
|
||||
sig = sig * amp;
|
||||
Out.ar(out, Pan2.ar(sig, pan));
|
||||
}).add;
|
||||
)
|
||||
|
||||
|
||||
(
|
||||
~processWavesets = { |w, minLength=300, weightLength=5, numClusters=10, groupSize=3|
|
||||
var lens, amps, validIndices;
|
||||
var lensVec, ampsVec, normLens, normAmps, features;
|
||||
var kmeans, results, centers, assignments;
|
||||
var reps;
|
||||
|
||||
// Extract lengths and amplitudes
|
||||
lens = w.lengths;
|
||||
amps = w.amps;
|
||||
|
||||
// Find indices of wavesets >= minLength
|
||||
validIndices = (0..(lens.size - 1)).select { |i| lens[i] >= minLength };
|
||||
|
||||
if (validIndices.isEmpty) {
|
||||
"No wavesets found above minLength!".postln;
|
||||
^nil;
|
||||
};
|
||||
|
||||
// Gather features for valid wavesets
|
||||
lensVec = validIndices.collect { |i| lens[i] };
|
||||
ampsVec = validIndices.collect { |i| amps[i] };
|
||||
|
||||
// Normalize features to 0..1 range
|
||||
normLens = lensVec.normalize(0, 1);
|
||||
normAmps = ampsVec.normalize(0, 1);
|
||||
|
||||
// Weight length feature
|
||||
features = Array.new(validIndices.size);
|
||||
validIndices.size.do { |i|
|
||||
features[i] = [normLens[i] * weightLength, normAmps[i]];
|
||||
};
|
||||
|
||||
// Define k-means clustering function
|
||||
kmeans = { |data, k, maxIter=100|
|
||||
var centers, assignments, changed;
|
||||
|
||||
// Initialize centers randomly
|
||||
centers = data.chooseN(k);
|
||||
assignments = Array.new(data.size, -1);
|
||||
|
||||
maxIter.do {
|
||||
changed = false;
|
||||
|
||||
data.size.do { |i|
|
||||
var distances = centers.collect { |c| (c - data[i]).norm };
|
||||
var minIndex = distances.indexOfMin;
|
||||
if (assignments[i] != minIndex) {
|
||||
assignments[i] = minIndex;
|
||||
changed = true;
|
||||
};
|
||||
};
|
||||
|
||||
if (changed.not) { ^[centers, assignments] };
|
||||
|
||||
// Update centers
|
||||
centers = (0..(k - 1)).collect { |cid|
|
||||
var clusterPoints = data.indices.select { |i| assignments[i] == cid }.collect { |i| data[i] };
|
||||
if (clusterPoints.isEmpty) {
|
||||
data.choose;
|
||||
} {
|
||||
clusterPoints.reduce({ |a, b| a + b }) / clusterPoints.size;
|
||||
};
|
||||
};
|
||||
};
|
||||
[centers, assignments]
|
||||
};
|
||||
|
||||
// Run clustering
|
||||
results = kmeans.(features, numClusters);
|
||||
centers = results[0];
|
||||
assignments = results[1];
|
||||
|
||||
// Find representative waveset per cluster (closest to centroid)
|
||||
reps = Dictionary.new;
|
||||
numClusters.do { |cid|
|
||||
var clusterIndices = validIndices.select({ |vi, idx| assignments[idx] == cid });
|
||||
if (clusterIndices.notEmpty) {
|
||||
var clusterFeatures = clusterIndices.collect { |i| features[validIndices.indexOf(i)] };
|
||||
var center = centers[cid];
|
||||
var distances = clusterFeatures.collect { |f| (f - center).norm };
|
||||
var minIdx = distances.indexOfMin;
|
||||
reps[cid] = clusterIndices[minIdx];
|
||||
};
|
||||
};
|
||||
|
||||
// Playback routine
|
||||
fork {
|
||||
var total = validIndices.size;
|
||||
var pos = 0;
|
||||
var localGroupSize = groupSize;
|
||||
|
||||
while { pos < total } {
|
||||
var clusterID, repIndex;
|
||||
|
||||
// Adjust groupSize if near end
|
||||
if (pos + localGroupSize > total) {
|
||||
localGroupSize = total - pos;
|
||||
};
|
||||
|
||||
clusterID = assignments[pos];
|
||||
repIndex = reps[clusterID] ?? validIndices[pos]; // fallback to original
|
||||
|
||||
// Play groupSize wavesets starting at representative
|
||||
var ev = w.eventFor(repIndex, localGroupSize, 1, 1);
|
||||
ev.put(\pan, rrand(-0.5, 0.5));
|
||||
ev.play;
|
||||
ev.sustain.wait;
|
||||
|
||||
pos = pos + localGroupSize;
|
||||
};
|
||||
};
|
||||
};
|
||||
)
|
||||
|
||||
|
||||
|
||||
// Usage example with your Wavesets instance `w`
|
||||
// Adjust parameters as you like
|
||||
~processWavesets.(w, minLength: 400, weightLength: 5, numClusters: 12, groupSize: 4);
|
||||
|
||||
@ -0,0 +1,31 @@
|
||||
// bleh
|
||||
|
||||
(
|
||||
SynthDef(\simple, {
|
||||
arg amp, tuningRatio, outBus;
|
||||
|
||||
var tuning = [
|
||||
113.20755,
|
||||
271.69811,
|
||||
384.90566,
|
||||
543.39623,
|
||||
656.60377,
|
||||
815.09434,
|
||||
928.30189,
|
||||
1086.79245,
|
||||
2/1
|
||||
]; // 53 edo
|
||||
var tunedFreq = \freq.ar(440) * tuning * tuningRatio;
|
||||
|
||||
var sig = SinOsc.ar(tunedFreq);
|
||||
var env = EnvGen.kr(Env.perc(0.01, 0.2), gate: 1);
|
||||
|
||||
Out.ar(0, sig * env * amp);
|
||||
}).play;
|
||||
)
|
||||
|
||||
(
|
||||
Pbind(\instrument, \simple,
|
||||
\freq, Pseq([60, 61].midicps),
|
||||
).play
|
||||
)
|
||||
Loading…
Reference in New Issue