Skip to content

Commit 53121d8

Browse files
authored
Create matrix.sh
1 parent 33a61e5 commit 53121d8

1 file changed

Lines changed: 41 additions & 0 deletions

File tree

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# ... (前面 seqkit fx2tab 部分保持不变) ...
2+
3+
echo "Calculating Adaptive Thresholds (Pure Bash/Awk)..."
4+
5+
# 1. 计算总行数 (Total Reads)
6+
TOTAL_READS=$(wc -l < read_metrics.tsv)
7+
echo "Total Reads: ${TOTAL_READS}"
8+
9+
# 2. 定义计算函数:输入列号(2=Len, 3=Qual),输入百分比(0.05=95%留存)
10+
# 逻辑:排序 -> 计算目标行号 -> 提取该行数值
11+
get_quantile() {
12+
col=$1
13+
p=$2
14+
# 计算目标行号 (Total * P),取整
15+
target_line=$(awk -v t="${TOTAL_READS}" -v p="${p}" 'BEGIN {printf "%.0f", t * p}')
16+
# 如果算出来是0,强制设为1
17+
[[ "$target_line" -eq 0 ]] && target_line=1
18+
19+
# 核心:对指定列排序,取第 target_line 行的数值
20+
# sort -k${col},${col}n : 按指定列数值排序 (从小到大)
21+
# head -n : 取前n行
22+
# tail -n 1 : 取最后一行(即第n行)
23+
# cut -f : 提取该列
24+
sort -t$'\t' -k${col},${col}n read_metrics.tsv | head -n "${target_line}" | tail -n 1 | cut -f${col}
25+
}
26+
27+
# 3. 计算 Length 阈值 (第2列)
28+
# 5%分位数 = 丢弃最烂的5% = 留存95%
29+
L95=$(get_quantile 2 0.05)
30+
L80=$(get_quantile 2 0.20)
31+
L60=$(get_quantile 2 0.40)
32+
33+
# 4. 计算 Quality 阈值 (第3列)
34+
Q95=$(get_quantile 3 0.05)
35+
Q80=$(get_quantile 3 0.20)
36+
Q60=$(get_quantile 3 0.40)
37+
38+
echo "Adaptive L-Grids (95%/80%/60%): ${L95}, ${L80}, ${L60}"
39+
echo "Adaptive Q-Grids (95%/80%/60%): ${Q95}, ${Q80}, ${Q60}"
40+
41+
# ... (后面生成任务部分保持不变) ...

0 commit comments

Comments
 (0)