Tampaknya tidak ada jawaban saat ini yang benar-benar akan menghapus 100% spasi putih dari awal dan akhir string.
Seperti disebutkan dalam posting lain, defaultnya TRIM
hanya menghapus spasi - bukan tab, formfeed, dll. Kombinasi dari TRIM
menentukan karakter spasi putih lainnya dapat memberikan peningkatan terbatas, mis TRIM(BOTH '\r' FROM TRIM(BOTH '\n' FROM TRIM(BOTH '\f' FROM TRIM(BOTH '\t' FROM TRIM(txt)))))
. Tetapi masalah dengan pendekatan ini hanya satu karakter yang dapat ditentukan untuk yang khusus TRIM
dan karakter tersebut hanya dihapus dari awal dan akhir. Jadi jika string yang dipangkas adalah sesuatu seperti \t \t \t \t
(yaitu spasi alternatif dan karakter tab), lebih banyak TRIM
s akan diperlukan - dan dalam kasus umum ini dapat berlangsung tanpa batas.
Untuk solusi yang ringan, harus dimungkinkan untuk menulis Fungsi Didefinisikan Pengguna (UDF) sederhana untuk melakukan pekerjaan dengan mengulangi karakter pada awal dan akhir string. Tetapi saya tidak akan melakukan itu ... karena saya telah menulis sebuah pengganti ekspresi reguler yang lebih berat yang juga dapat melakukan pekerjaan - dan mungkin berguna karena alasan lain, seperti dijelaskan dalam posting blog ini .
Demo
Rextester demo online . Secara khusus, baris terakhir menunjukkan metode lain gagal tetapi metode ekspresi reguler berhasil.
Fungsi :
-- ------------------------------------------------------------------------------------
-- USAGE
-- ------------------------------------------------------------------------------------
-- SELECT reg_replace(<subject>,
-- <pattern>,
-- <replacement>,
-- <greedy>,
-- <minMatchLen>,
-- <maxMatchLen>);
-- where:
-- <subject> is the string to look in for doing the replacements
-- <pattern> is the regular expression to match against
-- <replacement> is the replacement string
-- <greedy> is TRUE for greedy matching or FALSE for non-greedy matching
-- <minMatchLen> specifies the minimum match length
-- <maxMatchLen> specifies the maximum match length
-- (minMatchLen and maxMatchLen are used to improve efficiency but are
-- optional and can be set to 0 or NULL if not known/required)
-- Example:
-- SELECT reg_replace(txt, '^[Tt][^ ]* ', 'a', TRUE, 2, 0) FROM tbl;
DROP FUNCTION IF EXISTS reg_replace;
CREATE FUNCTION reg_replace(subject VARCHAR(21845), pattern VARCHAR(21845),
replacement VARCHAR(21845), greedy BOOLEAN, minMatchLen INT, maxMatchLen INT)
RETURNS VARCHAR(21845) DETERMINISTIC BEGIN
DECLARE result, subStr, usePattern VARCHAR(21845);
DECLARE startPos, prevStartPos, startInc, len, lenInc INT;
IF subject REGEXP pattern THEN
SET result = '';
-- Sanitize input parameter values
SET minMatchLen = IF(minMatchLen < 1, 1, minMatchLen);
SET maxMatchLen = IF(maxMatchLen < 1 OR maxMatchLen > CHAR_LENGTH(subject),
CHAR_LENGTH(subject), maxMatchLen);
-- Set the pattern to use to match an entire string rather than part of a string
SET usePattern = IF (LEFT(pattern, 1) = '^', pattern, CONCAT('^', pattern));
SET usePattern = IF (RIGHT(pattern, 1) = '$', usePattern, CONCAT(usePattern, '$'));
-- Set start position to 1 if pattern starts with ^ or doesn't end with $.
IF LEFT(pattern, 1) = '^' OR RIGHT(pattern, 1) <> '$' THEN
SET startPos = 1, startInc = 1;
-- Otherwise (i.e. pattern ends with $ but doesn't start with ^): Set start position
-- to the min or max match length from the end (depending on "greedy" flag).
ELSEIF greedy THEN
SET startPos = CHAR_LENGTH(subject) - maxMatchLen + 1, startInc = 1;
ELSE
SET startPos = CHAR_LENGTH(subject) - minMatchLen + 1, startInc = -1;
END IF;
WHILE startPos >= 1 AND startPos <= CHAR_LENGTH(subject)
AND startPos + minMatchLen - 1 <= CHAR_LENGTH(subject)
AND !(LEFT(pattern, 1) = '^' AND startPos <> 1)
AND !(RIGHT(pattern, 1) = '$'
AND startPos + maxMatchLen - 1 < CHAR_LENGTH(subject)) DO
-- Set start length to maximum if matching greedily or pattern ends with $.
-- Otherwise set starting length to the minimum match length.
IF greedy OR RIGHT(pattern, 1) = '$' THEN
SET len = LEAST(CHAR_LENGTH(subject) - startPos + 1, maxMatchLen), lenInc = -1;
ELSE
SET len = minMatchLen, lenInc = 1;
END IF;
SET prevStartPos = startPos;
lenLoop: WHILE len >= 1 AND len <= maxMatchLen
AND startPos + len - 1 <= CHAR_LENGTH(subject)
AND !(RIGHT(pattern, 1) = '$'
AND startPos + len - 1 <> CHAR_LENGTH(subject)) DO
SET subStr = SUBSTRING(subject, startPos, len);
IF subStr REGEXP usePattern THEN
SET result = IF(startInc = 1,
CONCAT(result, replacement), CONCAT(replacement, result));
SET startPos = startPos + startInc * len;
LEAVE lenLoop;
END IF;
SET len = len + lenInc;
END WHILE;
IF (startPos = prevStartPos) THEN
SET result = IF(startInc = 1, CONCAT(result, SUBSTRING(subject, startPos, 1)),
CONCAT(SUBSTRING(subject, startPos, 1), result));
SET startPos = startPos + startInc;
END IF;
END WHILE;
IF startInc = 1 AND startPos <= CHAR_LENGTH(subject) THEN
SET result = CONCAT(result, RIGHT(subject, CHAR_LENGTH(subject) + 1 - startPos));
ELSEIF startInc = -1 AND startPos >= 1 THEN
SET result = CONCAT(LEFT(subject, startPos), result);
END IF;
ELSE
SET result = subject;
END IF;
RETURN result;
END;
DROP FUNCTION IF EXISTS format_result;
CREATE FUNCTION format_result(result VARCHAR(21845))
RETURNS VARCHAR(21845) DETERMINISTIC BEGIN
RETURN CONCAT(CONCAT('|', REPLACE(REPLACE(REPLACE(REPLACE(result, '\t', '\\t'), CHAR(12), '\\f'), '\r', '\\r'), '\n', '\\n')), '|');
END;
DROP TABLE IF EXISTS tbl;
CREATE TABLE tbl
AS
SELECT 'Afghanistan' AS txt
UNION ALL
SELECT ' AF' AS txt
UNION ALL
SELECT ' Cayman Islands ' AS txt
UNION ALL
SELECT CONCAT(CONCAT(CONCAT('\t \t ', CHAR(12)), ' \r\n\t British Virgin Islands \t \t ', CHAR(12)), ' \r\n') AS txt;
SELECT format_result(txt) AS txt,
format_result(TRIM(txt)) AS trim,
format_result(TRIM(BOTH '\r' FROM TRIM(BOTH '\n' FROM TRIM(BOTH '\f' FROM TRIM(BOTH '\t' FROM TRIM(txt))))))
AS `trim spaces, tabs, formfeeds and line endings`,
format_result(reg_replace(reg_replace(txt, '^[[:space:]]+', '', TRUE, 1, 0), '[[:space:]]+$', '', TRUE, 1, 0))
AS `reg_replace`
FROM tbl;
Pemakaian:
SELECT reg_replace(
reg_replace(txt,
'^[[:space:]]+',
'',
TRUE,
1,
0),
'[[:space:]]+$',
'',
TRUE,
1,
0) AS `trimmed txt`
FROM tbl;