JS實現全半形轉換和去空格

為啥強制暱稱發表於2013-12-07

最近在寫開題報告,發現PDF中的文字複製出來會各種凌亂:標點全半形不定、各種空格莫名其妙的多。太影響複製貼上了。

用js寫了個解決問題的辦法,基本就是用正規表示式替換。為了能方便的加入新標點轉換,做了一下結構上的優化。

<html>
<!-- trim all the spaces in input, and make the punctuations in right case-->
<head>
	<meta charset="UTF-8">
	<script type="text/javascript"
	src="http://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"></script>
	<script type="text/javascript">

	$(function(){
		//bind events
		$('#format').click(format);
		$('#clear').click(clear);
	});

	//replace strategy
	function Strategy(reg, rep){
		this[this.REG] = reg;
		this[this.REP] = rep;
	}
	Strategy.prototype.REG = 0;
	Strategy.prototype.REP = 1;

	//replace utils
	function change(p1, p2, mapping){
		return p1 + mapping[p2];
	}

	function getRegOf(word, mapping){
		var str = '';
		for (var i in mapping){
			str += '\\' + i;
		}
		return new RegExp('(' + word + ')([' + str + '])', 'g');
	}

	// DBC to SBC case
	function en2cnChange(){
		var args = arguments;
		return change(args[1], args[2], en2cnChange.prototype.mapping.mapping);
	}

	en2cnChange.prototype.mapping = {
		mapping : {
			'\,': ',',
			'\.': '。',
			'\;': ';',
			'\!': '!'
		},
		reg: function(){
			if(this._reg == undefined){
				//initial once
				this._reg = getRegOf('\\W', this.mapping);
			}
			return this._reg;
		}
	}
	//SBC to DBC case
	function cn2enChange(){
		var args = arguments;
		return change(args[1], args[2], cn2enChange.prototype.mapping.mapping);
	}

	cn2enChange.prototype.mapping = {
		mapping : {
			',': '\,',
			'。': '\.',
			';': '\;',
			'!': '\!'
		},
		reg: function(){
			if(this._reg == undefined){
				//initial once
				this._reg = getRegOf('\\w', this.mapping);
			}
			return this._reg;
		}
	}

	// event handler
	function format(){
		var str = $('#input').val();
		var reg = Strategy.prototype.REG;
		var rep = Strategy.prototype.REP;
		var strategies = format.prototype.strategies;
		for(var i in strategies){
			var strategy = strategies[i]; 
			str = str.replace(strategy[reg], strategy[rep]);
		} 
		$('#output').val(str);
	}
	format.prototype.strategies = [
		new Strategy(en2cnChange.prototype.mapping.reg(), en2cnChange),
		new Strategy(cn2enChange.prototype.mapping.reg(), cn2enChange),
		new Strategy(/\s/g, function(){return ''})// whtie space
	];

	function clear(){
		$('textarea').each(function(){$(this).val('')});
	}
	</script>
	<style type="text/css">
	textarea{
		display: inline-block;
		width: 45%;
		height:80%;
		margin: 1em;
	}
	</style>
</head>
	<textarea id="input" placeholder="input"></textarea>
	<textarea id="output" placeholder="output"></textarea>
	<br/>
	<button id="format">format</button>
	<button id="clear">clear</button>
</html>



相關文章