In [1]:
import webbrowser
webbrowser.open('https://www.youtube.com/watch?v=3Dg006VuOMk&list=RD3Dg006VuOMk&start_radio=1')
Out[1]:
True
In [2]:
!pip3 install nltk
Collecting nltk Using cached nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB) Requirement already satisfied: click in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (8.1.7) Requirement already satisfied: joblib in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (1.4.2) Requirement already satisfied: regex>=2021.8.3 in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (2024.7.24) Requirement already satisfied: tqdm in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (4.66.5) Requirement already satisfied: colorama in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from click->nltk) (0.4.6) Using cached nltk-3.9.1-py3-none-any.whl (1.5 MB) Installing collected packages: nltk Successfully installed nltk-3.9.1
In [3]:
import nltk
nltk.download()
showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml
Out[3]:
True
In [9]:
from nltk.tokenize import word_tokenize, sent_tokenize
sample_sentence="Hi I am Learning NLP with Itronix Solutions"
a=word_tokenize(sample_sentence)
print(a)
['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']
In [10]:
print(sample_sentence.split())
['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']
In [12]:
sample_sentence2="Hi I am Learning NLP with Mr. Karan Arora. My age is 27"
b=sent_tokenize(sample_sentence2)
print(b)
['Hi I am Learning NLP with Mr. Karan Arora.', 'My age is 27']
In [13]:
EXAMPLE_TEXT = "Hello Mr. Karan Arora, how are you doing today? The weather is great, and Python is awesome. The sky is blue. You shouldn't eat chicken."
print(sent_tokenize(EXAMPLE_TEXT))
['Hello Mr. Karan Arora, how are you doing today?', 'The weather is great, and Python is awesome.', 'The sky is blue.', "You shouldn't eat chicken."]
In [14]:
print(word_tokenize(EXAMPLE_TEXT))
['Hello', 'Mr.', 'Karan', 'Arora', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The', 'weather', 'is', 'great', ',', 'and', 'Python', 'is', 'awesome', '.', 'The', 'sky', 'is', 'blue', '.', 'You', 'should', "n't", 'eat', 'chicken', '.']
In [15]:
from nltk.tokenize import word_tokenize
from nltk.tokenize import TreebankWordTokenizer
from nltk.tokenize import WordPunctTokenizer
In [16]:
tok2=TreebankWordTokenizer() #use short form
tok3=WordPunctTokenizer()
sent="Hi my name is Karan"
print(word_tokenize(sent))
print(tok2.tokenize(sent)) #or TreebankWordTokenizer().tokenize(sent)
print(tok3.tokenize(sent))
['Hi', 'my', 'name', 'is', 'Karan'] ['Hi', 'my', 'name', 'is', 'Karan'] ['Hi', 'my', 'name', 'is', 'Karan']
In [17]:
sent2="I won't let you bring wine"
print(word_tokenize(sent2))
print(tok2.tokenize(sent2))
print(tok3.tokenize(sent2))
['I', 'wo', "n't", 'let', 'you', 'bring', 'wine'] ['I', 'wo', "n't", 'let', 'you', 'bring', 'wine'] ['I', 'won', "'", 't', 'let', 'you', 'bring', 'wine']
In [19]:
from nltk.tokenize import word_tokenize, sent_tokenize
sample_sentence="Hi I am Learning NLP with Itronix Solutions"
a=word_tokenize(sample_sentence)
print(a)
['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']
In [20]:
from nltk.corpus import stopwords
stop_words=set(stopwords.words("english"))
stop_words
Out[20]:
{'a',
'about',
'above',
'after',
'again',
'against',
'ain',
'all',
'am',
'an',
'and',
'any',
'are',
'aren',
"aren't",
'as',
'at',
'be',
'because',
'been',
'before',
'being',
'below',
'between',
'both',
'but',
'by',
'can',
'couldn',
"couldn't",
'd',
'did',
'didn',
"didn't",
'do',
'does',
'doesn',
"doesn't",
'doing',
'don',
"don't",
'down',
'during',
'each',
'few',
'for',
'from',
'further',
'had',
'hadn',
"hadn't",
'has',
'hasn',
"hasn't",
'have',
'haven',
"haven't",
'having',
'he',
'her',
'here',
'hers',
'herself',
'him',
'himself',
'his',
'how',
'i',
'if',
'in',
'into',
'is',
'isn',
"isn't",
'it',
"it's",
'its',
'itself',
'just',
'll',
'm',
'ma',
'me',
'mightn',
"mightn't",
'more',
'most',
'mustn',
"mustn't",
'my',
'myself',
'needn',
"needn't",
'no',
'nor',
'not',
'now',
'o',
'of',
'off',
'on',
'once',
'only',
'or',
'other',
'our',
'ours',
'ourselves',
'out',
'over',
'own',
're',
's',
'same',
'shan',
"shan't",
'she',
"she's",
'should',
"should've",
'shouldn',
"shouldn't",
'so',
'some',
'such',
't',
'than',
'that',
"that'll",
'the',
'their',
'theirs',
'them',
'themselves',
'then',
'there',
'these',
'they',
'this',
'those',
'through',
'to',
'too',
'under',
'until',
'up',
've',
'very',
'was',
'wasn',
"wasn't",
'we',
'were',
'weren',
"weren't",
'what',
'when',
'where',
'which',
'while',
'who',
'whom',
'why',
'will',
'with',
'won',
"won't",
'wouldn',
"wouldn't",
'y',
'you',
"you'd",
"you'll",
"you're",
"you've",
'your',
'yours',
'yourself',
'yourselves'}
In [22]:
for word in a:
if word not in stop_words:
print(word,end=" ")
Hi I Learning NLP Itronix Solutions
In [26]:
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
port=PorterStemmer()
words = ["program", "programs", "programer", "programing", "programers",'beautiful']
for w in words:
print(port.stem(w))
program program program program program beauti
In [27]:
from nltk.stem import LancasterStemmer
lstemmer=LancasterStemmer()
lstemmer.stem('beautiful')
Out[27]:
'beauty'
In [28]:
from nltk.stem import WordNetLemmatizer
lzr = WordNetLemmatizer()
print(lzr.lemmatize('working',pos='v')) #pos : part of speech
print("better :", lzr.lemmatize("better", pos ="a"))
work better : good
In [ ]:
Machine Learning Tutorials, Courses and Certifications