Python中字串的操作

lm_y發表於2017-08-23
學習下str操作函式!

class str(basestring)
 |  str(object='') -> string # 由物件返回一個string
 |  
 |  Return a nice string representation of the object.
 |  If the argument is a string, the return value is the same object.
 |  
 |  Method resolution order:
 |      str
 |      basestring
 |      object

Methods defined here:

 |  capitalize(...)

 |      S.capitalize() -> string  
 |      # 返回一個首字母大寫的字串!
 |      Return a copy of the string S with only its first character
 |      capitalized.                                

>>> test = "abc"

  1. >>> test.capitalize()  
  2. 'Abc'  

 |  center(...) 

 |      S.center(width[, fillchar]) -> string
 |      # 返回一個把原字串移到中間,預設兩邊新增空格的字串,也可以自己指定填充物。
 |      Return S centered in a string of length width. Padding is done using the specified fill character (default is a space)
  1. >>> a = "abcd"  
  2. >>> a.center(8)  
  3. '  abcd  '  
  4. >>> a.center(8,"*")  
  5. '**abcd**'  
  6. >>> a.center(3)  
  7. 'abcd' # 小於字串長度不會變  

 |  count(...)

 |      S.count(sub[, start[, end]]) -> int
 |      # 返回子字串在S中出現的次數,可以指定起始位置

     Return the number of non-overlapping occurrences of substring sub in string S[start:end].  Optional arguments start and end are interpreted as in slice notation.

 |  decode(...)

 |      S.decode([encoding[,errors]]) -> object
 |      # 重要!
 |      Decodes S using the codec registered for encoding. encoding defaults to the default encoding. errors may be given to set a different error handling scheme. Default is 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible values are 'ignore' and 'replace' as well as any other name registered with codecs.register_error that is able to handle UnicodeDecodeErrors.

 |  encode(...)

 |      S.encode([encoding[,errors]]) -> object
 |      # 重要!
 |      Encodes S using the codec registered for encoding. encoding defaults to the default encoding. errors may be given to set a different error handling scheme. Default is 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name registered with codecs.register_error that is able to handle UnicodeEncodeErrors.

 |  endswith(...)

 |      S.endswith(suffix[, start[, end]]) -> bool
 |      # 檢查是否以suffix結尾,可以指定位置。做迴圈的判定條件很有用,免去==!
 |      Return True if S ends with the specified suffix, False otherwise. With optional start, test S beginning at that position. With optional end, stop comparing S at that position. suffix can also be a tuple of strings to try.

 |  expandtabs(...)

 |      S.expandtabs([tabsize]) -> string
 |      # 把字串中的製表符tab轉換為tabsize個空格,預設為8個字元
 |      Return a copy of S where all tab characters are expanded using spaces. If tabsize is not given, a tab size of 8 characters is assumed.

 |  find(...)

 |      S.find(sub [,start [,end]]) -> int
 |      # 在S中尋找sub,並可以指定起始位置,返回sub在S中的下標index,找不到返回-1
 |      Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end].  Optional arguments start and end are interpreted as in slice notation.  Return -1 on failure.
  1. >>> a = "abcdabcd"  
  2. >>> a.find("b")  
  3. 1  
  4. >>> a.find("b"27)  
  5. 5  
  6. >>> a.find("b"37)  
  7. 5  
  8. >>> a.find("b"67)  
  9. -1  

 |  format(...) 

 |      S.format(*args, **kwargs) -> string
 |      # 字串的格式化輸出!# 例子太多了,官方文件很多:點選開啟連結,我只給出我用過的例子。

  Return a formatted version of S, using substitutions from args and kwargs.The substitutions are identified by braces ('{' and '}').【2014.06.28特別標註,format中轉義用的是{}而不是\

 >>> '{:,}'.format(1234567890) # Using the comma as a thousands separator

  1. '1,234,567,890'  
  2. >>> 'Correct answers: {:.2%}'.format(19.5/22# Expressing a percentage  
  3. 'Correct answers: 88.64%'  
  4. >>> import datetime  
  1. >>> d = datetime.datetime(201074121558)  
  2. >>> '{:%Y-%m-%d %H:%M:%S}'.format(d)  
  3. '2010-07-04 12:15:58'  
  1. >>> # 替代center等方法的功能  
  2. >>> '{:<30}'.format('left aligned')  
  3. 'left aligned                  '  
  4. >>> '{:>30}'.format('right aligned')  
  5. '                 right aligned'  
  6. >>> '{:^30}'.format('centered')  
  7. '           centered           '  
  8. >>> '{:*^30}'.format('centered')  # use '*' as a fill char  
  9. '***********centered***********  
  1. >>> # Accessing arguments by position:  
  2. >>> '{0}, {1}, {2}'.format('a''b''c')  
  3. 'a, b, c'  
  4. >>> '{}, {}, {}'.format('a''b''c')  # 2.7+ only  
  5. 'a, b, c'  
  6. >>> '{2}, {1}, {0}'.format('a''b''c')  
  7. 'c, b, a'  
  8. >>> '{2}, {1}, {0}'.format(*'abc')      # unpacking argument sequence  
  9. 'c, b, a'  
  10. >>> '{0}{1}{0}'.format('abra''cad')   # arguments' indices can be repeated  
  11. 'abracadabra'  

更新於:2014.06.28Python format()怎麼輸出{}的問題,請看下面程式碼,很奇葩,學的還很淺顯啊!

  1. >>> print "hi {{}} {{key}}".format(key = "string")  
  2. hi {} {key}  

 |  index(...) 

 |      S.index(sub [,start [,end]]) -> int
 |      # 類似find尋找下標,找不到會報錯,find找不到返回-1
 |      Like S.find() but raise ValueError when the substring is not found.

 |  isalnum(...)

 |      S.isalnum() -> bool
 |      # 判斷S中是否全為數字或者字母【並至少有一個字元】,是則返回True。有中文或者符號或者沒有字元返回False
 |      Return True if all characters in S are alphanumeric and there is at least one character in S, False otherwise.
  1. >>> a = "adad1122"  
  2. >>> a.isalnum()  
  3. True  
  4. >>> a = "3123dddaw''[]"  
  5. >>> a.isalnum()  
  6. False  
  7. >>> a = "你好hello"  
  8. >>> a.isalnum()  
  9. False  

 |  isalpha(...) 

 |      S.isalpha() -> bool
 |      # 判斷是否全為字母【並至少有一個字元】

 |      Return True if all characters in S are alphabetic  and there is at least one character in S, False otherwise.

 |  isdigit(...)

 |      S.isdigit() -> bool
 |      #  判斷是否全為數字【並至少有一個字元】
 |      Return True if all characters in S are digits and there is at least one character in S, False otherwise.

 |  islower(...)

 |      S.islower() -> bool
 |      # 判斷字母是否全為小寫(有數字不影響)【並至少有一個字元】
 |      Return True if all cased characters in S are lowercase and there is at least one cased character in S, False otherwise.

 |  isspace(...)

 |      S.isspace() -> bool
 |      # 判斷是否全為空白字元【並至少有一個字元】
 |      Return True if all characters in S are whitespace and there is at least one character in S, False otherwise.

 |  istitle(...)

 |      S.istitle() -> bool
 |      # 判斷S中每個單詞是否首字母大寫,並且後面字母都為小寫!【並至少有一個字元】# 很多bolg無限複製的都是錯的。實踐很重要!
 |      Return True if S is a titlecased string and there is at least one character in S, i.e. uppercase characters may only follow uncased characters and lowercase characters only cased ones. Return False otherwise.
  1. >>> a = "Abc"  
  2. >>> a.istitle()  
  3. True  
  4. >>> a = "aBc"  
  5. >>> a.istitle()  
  6. False  
  7. >>> a = "AbC"  
  8. >>> a.istitle()  
  9. False  
  1. >>> a = "Abc Cbc"  
  2. >>> a.istitle()  
  3. True  
  4. >>> a = "Abc cbc"  
  5. >>> a.istitle()  
  6. False  

 |  isupper(...)

 |      S.isupper() -> bool
 |      # 判斷字母是否是全大寫(有數字不影響)【並至少有一個字元】 
 |      Return True if all cased characters in S are uppercase and there is at least one cased character in S, False otherwise.

 |  join(...)

 |      S.join(iterable) -> string
 |      # 經常使用!把迭代器中的內容用S作為連線符連線起來!迭代器中內容必須也為子符串(以前沒留意)!
 |      Return a string which is the concatenation of the strings in the iterable.  The separator between elements is S.

 |  ljust(...) 

 |      S.ljust(width[, fillchar]) -> string
 |      #輸出width個字元,S左對齊,不足部分用fillchar填充,預設的為空格。 
 |      Return S left-justified in a string of length width. Padding is done using the specified fill character (default is a space).

 |  lower(...)

 |      S.lower() -> string
 |      # 返回一個全部變為小寫的字串。
 |      Return a copy of the string S converted to lowercase.

 |  lstrip(...)

 |      S.lstrip([chars]) -> string or unicode
 |      # 去掉字串左邊的空格或者刪除掉指定的chars(如果有的話)。
 |      Return a copy of the string S with leading whitespace removed. If chars is given and not None, remove characters in chars instead. If chars is unicode, S will be converted to unicode before stripping

 |  partition(...)

 |      S.partition(sep) -> (head, sep, tail)
 |      # 接受一個字串引數,並返回一個3個元素的 tuple 物件。如果sep沒出現在母串中,返回值是 (sep, ‘’, ‘’);否則,返回值的第一個元素是 sep 左端的部分,第二個元素是 sep 自身,第三個元素是 sep 右端的部分。
 |      Search for the separator sep in S, and return the part before it, the separator itself, and the part after it.  If the separator is not found, return S and two empty strings.

 |  replace(...)

 |      S.replace(old, new[, count]) -> string
 |      # 替換!沒有給定count時,預設替換所有字串,如果給定了count,則只替換指定count個!
 |      Return a copy of string S with all occurrences of substring old replaced by new.  If the optional argument count is given, only the first count occurrences are replaced.
  1. >>> a = "  213  213 1312  "  
  2. >>> a.replace(" ", "")  
  3. '2132131312'  
  4. >>> a.replace(" ", "", 3)  
  5. '213 213 1312  '  
  6. >>> a.replace(" ", "", 5)  
  7. '2132131312  '  
  8. >>> a  
  9. '  213  213 1312  '  

【2014.05.22更新】

初學時候一直覺得str方法操作不怎麼合理。因為a.replace()操作過後a是不會變的。剛開始使用的時候很不習慣。現在想想這麼設計是很合理的!

為什麼呢?在學習tuple和dict的時候大家會學習不可變的物件!其中就會說到str。python這樣設計的目的就是保證a不會改變!!!保證不可變物件自身永不可變。

 |  rfind(...)

 |      S.rfind(sub [,start [,end]]) -> int
 |      # 查詢,返回最大的index,也可以指定位置(切片中)查詢,找不到返回-1
 |      Return the highest index in S where substring sub is found, such that sub is contained within S[start:end].  Optional arguments start and end are interpreted as in slice notation.
 |      
 |      Return -1 on failure.

 |  rindex(...)

 |      S.rindex(sub [,start [,end]]) -> int
 |      # 同rfind,沒找到報錯。
 |      Like S.rfind() but raise ValueError when the substring is not found.

 |  rjust(...)

 |      S.rjust(width[, fillchar]) -> string
 |      #輸出width個字元,S右對齊,不足部分用fillchar填充,預設的為空格。
 |      Return S right-justified in a string of length width. Padding is done using the specified fill character (default is a space)

 |  rpartition(...)

 |      S.rpartition(sep) -> (head, sep, tail)
 |      
 |      Search for the separator sep in S, starting at the end of S, and return the part before it, the separator itself, and the part after it.  If the separator is not found, return two empty strings and S.

 |  rsplit(...)

 |      S.rsplit([sep [,maxsplit]]) -> list of strings
 |      # 和split()相同,只不過從尾部開始分割
 |      Return a list of the words in the string S, using sep as the delimiter string, starting at the end of the string and working  to the front.  If maxsplit is given, at most maxsplit splits are done. If sep is not specified or is None, any whitespace string is a separator.

 |  rstrip(...)

 |      S.rstrip([chars]) -> string or unicode
 |      # 去掉字串s右變的空格或者指定的chars
 |      Return a copy of the string S with trailing whitespace removed. If chars is given and not None, remove characters in chars instead. If chars is unicode, S will be converted to unicode before stripping

 |  split(...)

 |      S.split([sep [,maxsplit]]) -> list of strings
 |      # 經常使用!用sep作為標記把S切分為list(sep在S中),和join()配合使用。
 |      Return a list of the words in the string S, using sep as the delimiter string.  If maxsplit is given, at most maxsplit splits are done. If sep is not specified or is None, any whitespace string is a separator and empty strings are removed from the result.

 |  splitlines(...)

 |      S.splitlines(keepends=False) -> list of strings
 |      
 |      Return a list of the lines in S, breaking at line boundaries. Line breaks are not included in the resulting list unless keepends is given and true.

 |  startswith(...)

 |      S.startswith(prefix[, start[, end]]) -> bool
 |      # 判斷s是否以prefix開頭,s的切片字串是否以prefix開頭
 |      Return True if S starts with the specified prefix, False otherwise. With optional start, test S beginning at that position. With optional end, stop comparing S at that position. prefix can also be a tuple of strings to try.

 |  strip(...)

 |      S.strip([chars]) -> string or unicode
 |      # 去掉字串s兩端的空格或者指定的chars

 |      Return a copy of the string S with leading and trailing whitespace removed. If chars is given and not None, remove characters in chars instead. If chars is unicode, S will be converted to unicode before stripping

【更新於2014.07.31:去掉指定的字串不要使用replace("", "'),strip快!】

 |  swapcase(...)

 |      S.swapcase() -> string
 |      # 大小寫互換
 |      Return a copy of the string S with uppercase characters converted to lowercase and vice versa.

 |  title(...)

 |      S.title() -> string
 |      # 返回一個每個單詞首字母大寫,其他小寫的字串。
 |      Return a titlecased version of S, i.e. words start with uppercase characters, all remaining cased characters have lowercase.

 |  translate(...)

 |      S.translate(table [,deletechars]) -> string
 |      
 |      Return a copy of the string S, where all characters occurring in the optional argument deletechars are removed, and the remaining characters have been mapped through the given  translation table, which must be a string of length 256 or None. If the table argument is None, no translation is applied and the operation simply removes the characters in deletechars.

 |  upper(...)

 |      S.upper() -> string
 |      # 小寫字母變為大寫
 |      Return a copy of the string S converted to uppercase.

 |  zfill(...)

 |      S.zfill(width) -> string
 |      # zfill()左側以字元0進行填充,在輸出數值時常用!
 |      Pad a numeric string S with zeros on the left, to fill a field of the specified width.  The string S is never truncated.
 |  
 |  ---------------------------------------------------------------------- 

str切片操作:

str[0:5] 擷取第一位到第四位的字元
str[:] 擷取字串的全部字元
str[4:] 擷取第五個字元到結尾
str[:-3] 擷取從頭開始到倒數第三個字元之前
str[2] 擷取第三個字元
str[::-1] 創造一個與原字串順序相反的字串 字串的反轉


decode&encode

暫時不更新
不定期更新,轉載請帶上本文地址:http://blog.csdn.net/zhanh1218/article/details/21826239

相關文章