master
Anne Lorenz 2018-12-19 13:04:19 +01:00
parent 8b36686d0c
commit ee911377bf
2 changed files with 8878 additions and 8915 deletions

File diff suppressed because one or more lines are too long

View File

@ -249,75 +249,20 @@
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6c56c792f25843799d540a887e3639f8",
"version_major": 2,
"version_minor": 0
},
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>interactive</code>.</p>\n",
"<p>\n",
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
"</p>\n",
"<p>\n",
" If you're reading this message in another frontend (for example, a static\n",
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"</p>\n"
],
"text/plain": [
"interactive(children=(BoundedIntText(value=1, description='Round no.:', max=1000, min=1), Output()), _dom_classes=('widget-interact',))"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def n(x):\n",
" ''' this function is executed when the current round number is entered\n",
" '''\n",
" return x\n",
"\n",
"# create widget for current iteration/round number\n",
"w = interactive(n, x = widgets.BoundedIntText(value=1, min=1, max=1000, step=1, description='Round no.:'))\n",
"display(w)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 69,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Continue with iteration number: 1\n"
"Last iteration number: 2\n",
"\n",
"Number of labeled articles: 20\n",
"Number of unlabeled articles: 9980\n"
]
}
],
"source": [
"# save round number\n",
"m = w.result\n",
"\n",
"print('Continue with iteration number: {}'.format(m))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# read current data set from csv\n",
"df = pd.read_csv('../data/interactive_labeling.csv',\n",
@ -325,33 +270,40 @@
" usecols=range(1,12), # drop first column 'unnamed'\n",
" encoding='utf-8',\n",
" quoting=csv.QUOTE_NONNUMERIC,\n",
" quotechar='\\'')"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Last round (no. 0):\n",
"Number of labeled articles: 0\n",
"Number of unlabeled articles: 10000\n"
]
}
],
"source": [
"print('Last round (no. {}):'.format(m-1))\n",
" quotechar='\\'')\n",
"\n",
"# find current iteration/round number\n",
"m = int(df['Round'].max())\n",
"\n",
"print('Last iteration number: {}'.format(m))\n",
"print()\n",
"print('Number of labeled articles: {}'.format(len(df.loc[df['Label'] != -1])))\n",
"print('Number of unlabeled articles: {}'.format(len(df.loc[df['Label'] == -1])))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 68,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Continue with iteration number: 3\n"
]
}
],
"source": [
"# increment round number\n",
"m += 1\n",
"\n",
"print('Continue with iteration number: {}'.format(m))"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"scrolled": true
},
@ -360,7 +312,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{}\n"
"{'United Parcel Service': 3, 'Insurer Aviva': 3, 'JP Morgan': 3, 'HSBC': 3, 'Natural Resources': 3, 'Horizon': 3, 'TransCanada Corps': 3, 'Energy East': 3, 'Gibson Energy': 3, 'Cenovus Energy': 3, 'Turner Mason &': 3, 'National Energy Board': 3, 'Airlines Lufthansa': 3, 'Etihad': 3, 'Lufthansa': 3, 'Etihad Airways': 3, 'Abu Dhabi': 3, 'Alitalia': 3, 'Qatar Airways': 3, 'IAG': 3, 'Credit Suisse': 5, 'Singapore Airlines': 3, 'Air China': 3, 'All Nippon Airways': 3, 'Brussels Airlines': 3, 'Barclays': 5, 'Societe Generale': 3, 'Nomura': 3, 'NATO': 3, 'MSCI': 3, 'Financial Services Agency': 3, 'Shiroyama Consulting Co..': 3, 'FSA': 3, 'CNBC': 3, 'Snap': 3, 'CII': 3, 'Supremex': 3, 'Triumph': 3, 'Aerospace Industries': 3, 'Korea Aerospace Industries': 3, 'Reynolds': 3, 'British American Tobacco': 3, 'NGP': 3, 'Philip Morris International': 3, 'Imperial Brands': 3, 'Philip Morris': 3, 'FDA': 3, 'Jefferies': 3, 'Japan Tobacco': 3, 'Mighty': 3, 'British American Tobacco vs Philip Morris': 3, 'GST': 2, 'Indias': 2, 'Lee Cheong Gold Dealers': 2, 'MILAN': 2, 'Roche': 2, 'Banco Santander': 2, 'Richemont': 2, 'Corpus Christi': 2, 'Buckeye Partners': 2, 'Buckeye Texas Processing': 2, 'Amazon': 2, 'ASOS': 2, 'Fox Jessica Toonkel': 2, 'Fox TV': 2, 'Walt Disney': 2, 'News': 2, 'Fox News Channel': 2, 'National Football League': 2, 'Major League Baseball': 2, 'Fox': 2, 'Fox News': 2, 'Fox Sports': 2, 'GAMCO Investors': 2, 'Sinclair Broadcast': 2, 'Fox into News': 2, 'Facebook': 4, 'Amazon.com': 2, 'Pivotal Research': 2, 'Major League Baseballs': 2, 'NFL': 2, 'Media Kitchen': 2, 'Abertis': 2, 'FT Confidential Research': 2, 'Garena Interactive': 2, 'Sea': 2, 'Farallon Capital Management': 2, 'Hillhouse Capital': 2, 'JG Summit': 2, 'Uni-President Enterprises': 2, 'Cathay Financial': 2, 'Alibaba': 2, 'Lazada': 2, 'Tokopedia': 2, 'JD.Com': 2, 'SeaTown': 2, 'Temasek': 2, 'Khazanah Nasional Bhd': 2, 'IFR': 2, 'PT Toba Bara Sejahtra': 2, 'Milans Bocconi University': 2, 'Northern League': 2, 'Silvio Berlusconis Forza Italia': 2, 'Northern Leagues': 2, 'Democratic Party': 2, 'Forza Italia': 2, 'ENI': 2, 'Ichiyoshi Asset Management': 2, 'FBI': 2, \"May 's Conservative\": 2, 'Labour': 2, 'development.Toshiba': 2, 'Broadcom': 2, 'Toshiba': 2, 'Western Digital': 2, 'said.Dentsu': 2, 'Apple': 2}\n"
]
}
],
@ -379,7 +331,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
@ -397,28 +349,49 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check round number\n",
"m"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"News article no. 4185:\n",
"News article no. 6380:\n",
"\n",
"HEADLINE:\n",
"4185 Gunman in California UPS shooting targeted co-workers for slayings\n",
"6380 Playtech revenue rises on strong gaming division performance\n",
"Name: Title, dtype: object\n",
"\n",
"TEXT:\n",
"4185 By Steve Gorman - June 23 June 23 The UPS employee who shot three coworkers to death last week inside a United Parcel Service facility in San Francisco before killing himself appears to have singled out his victims deliberately, but a motive remains unknown, police said on Friday.Investigators have yet to examine the contents of computers, cell phones and a journal seized from the gunman's home in their search for clues to the June 14 attack, San Francisco Police Commander Greg McEachern said at a news conference.McEachern also revealed the murder weapon was a MasterPiece Arms \"assault-type pistol\" that he said was \"commonly known as a MAC-10,\" equipped with an extended 30-round magazine. He said such weapons are outlawed in California.That gun and a second, semiautomatic pistol recovered from the scene were both listed as stolen weapons - the MAC-10 from Utah and the other handgun in California, McEachern said.Police offered few new details about how the shooting itself unfolded.The gunman, Jimmy Lam, 38, was attending a morning briefing with fellow employees at the UPS package-sorting and delivery center in San Francisco when he pulled out a gun and \"without warning or saying anything\" opened fire on four co-workers, the police commander said.The first two victims, identified as Wayne Chan, 56, and Benson Louie, 50, were killed.In the ensuing pandemonium, Lam walked calmly outside the building, approached another co-worker, Michael Lefiti, 46, and shot him dead without uttering a word, then reentered the facility.Moments later, as police closed in, Lam put a gun to his head and pulled the trigger, McEachern said, adding that Lam fired about 20 rounds in all before the bloodshed ended. Police never fired a shot.While no motive has been established, McEachern said interviews of various witnesses have led investigators to believe that the three slayings were \"purposeful and targeted,\" based on actions observed that day.He said surveillance video also showed that during the rampage, Lam appeared to pass by other co-workers \"without there being any interactions,\" suggesting those he did shoot were intentionally singled out. It was less clear whether the two surviving gunshot victims were deliberately targeted, he said.News of the carnage in San Francisco was largely overshadowed that day by an unrelated shooting hours earlier in the Virginia suburbs of Washington that left a congressman and several others wounded before police killed the assailant. (Reporting by Steve Gorman in Los Angeles; Editing by Bill Rigby)\n",
"6380 27 AM / 32 minutes ago Playtech revenue rises on strong gaming division performance Reuters Staff 1 Min Read (Reuters) - Gambling technology company Playtech ( PTEC.L ) reported half-year revenue up nearly 25 percent on a strong performance by its flagship Casino offering and benefits from recent acquisitions. Playtech, which provides software for sports betting and online casino and poker games, said that gaming revenue rose 22.8 percent to 376.5 million euros (347.43 million pounds). Total revenue for the six months to June 30 was 421.6 million euros, up from 337.7 million euros in the same period last year. Reporting by Rahul B in Bengaluru; Editing by David Goodman 0 : 0\n",
"Name: Text, dtype: object\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "286c21f192774952bd60d051cf197a8a",
"model_id": "f465cbe73b2a4d4da5345eb048f314e2",
"version_major": 2,
"version_minor": 0
},
@ -453,21 +426,21 @@
"___________________________________________________________________________________________________\n",
"\n",
"\n",
"News article no. 5874:\n",
"News article no. 2614:\n",
"\n",
"HEADLINE:\n",
"5874 Insurer Aviva first-half operating profit up 11 percent to 1.47 billion\n",
"2614 Airbus reaches 35 A320neo deliveries for 2017 - sources\n",
"Name: Title, dtype: object\n",
"\n",
"TEXT:\n",
"5874 August 3, 2017 / 6:34 AM / an hour ago Insurer Aviva first-half operating profit up 11 percent to 1.47 billion Reuters Staff 2 Min Read FILE PHOTO: Pedestrians walk past an Aviva logo outside the company's head office in the city of London, Britain March 5, 2009. Stephen Hird/File Photo LONDON (Reuters) - British insurer Aviva ( AV.L ) posted an 11 percent rise in operating profit in the first half of 2017 to 1.47 billion pounds ($1.94 billion), it said on Thursday, boosted by strong performances in its general insurance and fund management units. Analysts in a company-supplied poll had forecast an operating profit of 1.45 billion pounds. The company has been selling businesses it considers underperforming, including most recently Asia and Middle East-focused Friends Provident International and three Spanish joint ventures. \"Aviva is getting leaner and stronger and we are confident in our ability to sustain growth in the coming years,\" chief executive Mark Wilson said. Aviva Investors' operating profit rose 45 percent to 71 million pounds and the firm's general insurance business saw a 25 percent rise in operating profit to 417 million. Aviva's life business' operating profit rose 8 percent to 1.3 billion pounds. \"Aviva is transforming its 'no growth' businesses to 'organic growth' businesses,\" said analysts at JP Morgan, reiterating their overweight rating on the stock. Aviva also announced a 10-year extension of its UK general insurance distribution agreement with HSBC ( HSBA.L ), which it said was one of the largest ever in UK insurance. Combined operating ratio for the firm's general insurance business strengthened to 94.5 percent from 95.7 percent, where a level below 100 percent indicates an underwriting profit. The company said it would pay an interim dividend of 8.4 pence per share, up 13 percent and compared with a forecast 8.28 pence. Reporting by Carolyn Cohn; Editing by Rachel Armstrong 0 : 0 \n",
"2614 Business News 4:12pm BST Airbus reaches 35 A320neo deliveries for 2017 - sources The logo of Airbus Group is seen on the company's headquarters building in Toulouse, Southwestern France, April 18, 2017. REUTERS/Regis Duvignau PARIS Airbus ( AIR.PA ) has delivered 35 A320neo aircraft so far this year, industry sources said on Tuesday, bringing to 103 the number of upgraded medium-haul jets placed in service since deliveries began in January last year. The widely watched deliveries, which as of Monday totalled 9 so far in April, include the first aircraft for Icelandic budget carrier WOW air, which said on Tuesday it had taken the jet, powered by new LEAP engines from CFM International, under a leasing deal with Air Lease Corp ( AL.N ). Airbus aims to deliver some 200 of the A320neo jets, the latest version of Airbus's best-selling jet programme, this year. It is equipped with new fuel-saving engines from either CFM, jointly owned by General Electric ( GE.N ) and France's Safran ( SAF.PA ), or U.S. rival Pratt & Whitney. But deliveries have been hampered partly by problems with Pratt & Whitney's new Geared Turbofan engines. Since A320neo deliveries began in 2016, Airbus has delivered 53 aircraft with Pratt & Whitney engines and 50 powered by CFM. Pratt & Whitney parent United Technologies ( UTX.N ) on Tuesday reaffirmed plans to deliver 350 to 400 Geared Turbofan engines to planemakers this year. CFM's shareholders have said they are trimming forecasts for LEAP engine deliveries to Airbus and other planemakers in 2017 to 450-500 units from 500. Airbus is expected to give an update on its own deliveries to airlines with quarterly earnings on Thursday. (Reporting by Tim Hepher; editing by Alexander Smith)\n",
"Name: Text, dtype: object\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8b626884311344a8b8b33ae57a46f9ef",
"model_id": "ed29645e1df943bab4dc82bef62dfbc3",
"version_major": 2,
"version_minor": 0
},
@ -502,21 +475,21 @@
"___________________________________________________________________________________________________\n",
"\n",
"\n",
"News article no. 8684:\n",
"News article no. 1178:\n",
"\n",
"HEADLINE:\n",
"8684 UPDATE 1-Canadian Pacific eyeing signs of life in crude by rail shipments\n",
"1178 Braskem sees Brazil plastics market growing 2 pct in 2017 -CEO\n",
"Name: Title, dtype: object\n",
"\n",
"TEXT:\n",
"8684 47 PM / Updated 14 minutes ago UPDATE 1-Canadian Pacific eyeing signs of life in crude by rail shipments Reuters Staff 3 Min Read (Adds context, background on pipeline projects) MONTREAL/CALGARY, Alberta, Nov 14 (Reuters) - Canadian Pacific Railway Ltd sees shipments of crude by rail coming alive a little bit, Chief Marketing Officer John Brooks said on Tuesday, signaling a pickup in a business that had been hurt by low energy prices and competition from pipelines. Many traders are expecting a pickup in crude by rail volumes in 2018 as oil sands projects including Suncor Energy Incs Fort Hills plant and the latest phase of Canadian Natural Resources Ltds Horizon oil sands start producing at the end of this year. Canadian railway executives, however, remain cautious about crude-by-rail demand after they were forced to slash rates for shipping crude in 2015 due to a rout in global oil prices. The energy sector is really getting interesting, Brooks told a Toronto transportation conference, noting demand for shipping several energy-related products including frac sand, which is used in the hydraulic fracturing process. CP, Canadas second-largest railroad, in October reported a better-than-expected quarterly profit on higher shipments of crude oil, coal and potash. Energy industry players are bracing for congestion on Canadas major export pipelines, which are running close to capacity, while underutilized rail loading terminals built during a crude-by-rail boom in 2014 are increasing loading volumes. TransCanada Corps in October scrapped its $12 billion Energy East pipeline that would have taken crude from Alberta to the Atlantic coast, which could further increase producers reliance on crude-by-rail. Calgary-based Gibson Energy said on a third-quarter earnings call that it has started to see its Hardisty rail terminal in central Alberta being used more than in the past. And Cenovus Energy Inc, which owns the Bruderheim terminal near Edmonton, Alberta, said earlier this month that it has additional capacity to meet increased demand as it arises. With new production expected to come on line in the next year we are about to reach the limits of current pipeline infrastructure. This will likely result in a need to turn to rail as a stopgap to allow the new crude production to reach refineries, analysts from consultancy Turner Mason & Company said on Tuesday in a client note. The most recent National Energy Board data showed Canada exported 93,000 barrels per day (bpd) by rail in July, down 40 percent from a 2017 high of 156,000 bpd in March. However, since the summer the price discount on Canadian crude in Alberta versus its global benchmark has widened and is expected to deepen in coming months. With the wider differential rail shipments become more economic, even though they are still costlier than moving crude by pipelines. (Reporting By Allison Lampert in Montreal and Nia Williams in Calgary; Editing by Meredith Mazzilli)\n",
"1178 Company News 13am EST Braskem sees Brazil plastics market growing 2 pct in 2017 -CEO SAO PAULO Feb 22 Petrochemical producer Braskem SA expects demand for plastic resins to grow around 2 percent this year from 2016, Chief Executive Fernando Musa said on a Wednesday earnings call. Demand for polyethylene, polypropylene and PVC in Brazil rose 13 percent in the fourth quarter from a year ago, Braskem said in an unaudited earnings release on Wednesday. (Reporting by Brad Haynes) Next In Company News\n",
"Name: Text, dtype: object\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fb2f45b1947f4cd0a9fc57e33e188348",
"model_id": "33fcac8629a24518964db9295a29e4e3",
"version_major": 2,
"version_minor": 0
},
@ -551,21 +524,21 @@
"___________________________________________________________________________________________________\n",
"\n",
"\n",
"News article no. 475:\n",
"News article no. 2273:\n",
"\n",
"HEADLINE:\n",
"475 Airlines Lufthansa and Etihad 'in merger talks' - newspaper\n",
"2273 Effissimo says Toshiba stake purchase aimed at longer term price gain\n",
"Name: Title, dtype: object\n",
"\n",
"TEXT:\n",
"475 MILAN Germany's Lufthansa and Etihad Airways are in talks to possibly merge the two airlines, Italian newspaper Il Messaggero said in an unsourced report on Tuesday, boosting the German airline's share price.According to the paper, managers from both companies have for weeks been examining the possibility of Etihad buying a 30-40 percent stake in Lufthansa through a sale of new shares to the Abu Dhabi state-owned airline.In a second step, the two airlines would look at a full-blown merger, the paper said, adding that the parties would meet shortly to speed up the talks.Any combination between the two would have an impact on loss-making Italian airline Alitalia, which is 49 percent-owned by Etihad and is in the midst of a major restructuring that will likely include job cuts and grounding of planes.Lufthansa and Etihad declined to comment on what they described as \"speculation\".Lufthansa shares were up 6 percent on Tuesday, topping the DAX index of largest German companies.Lufthansa and Etihad last month signed a flight code-sharing deal after Lufthansa agreed to lease 38 crewed planes from Air Berlin, which is part-owned by Etihad.Analysts reacted with scepticism to the report, citing the foreign ownership rules governing international traffic rights, and questioning what the benefits for Lufthansa would be.In Europe an airline must by majority-owned by EU investors in order to maintain its traffic rights under international air service agreements.Lufthansa is currently almost 69 percent owned by German investors but 13 percent is in the hands of U.S. investors and a further 9 percent is owned by other nationalities.In addition, if Etihad wished to buy more than 30 percent of Lufthansa, it would have to make an offer for the company as a whole according to German takeover rules.Etihad's local rival Qatar Airways has built up a 20 percent stake in British Airways-owner IAG by purchasing shares on the open market. That has boosted links between Europe and the Asia-Pacific region. However, Credit Suisse said Lufthansa already had joint ventures with Singapore Airlines, Air China and All Nippon Airways covering the region.Greater cooperation with Lufthansa could help Etihad, especially given the growth of Qatar Airways, CAPA-Centre for Aviation senior analyst Will Horton said.\"The rapid growth of Qatar Airways and its future expansion will make it harder and costlier for Etihad to stay relevant on its own - everything else aside,\" he said in an emailed comment.There have previously been media reports that Italian shareholders in Alitalia are keen for Lufthansa to invest in the Italian carrier, along with speculation that Lufthansa could take on more of Air Berlin. However, Lufthansa executives have repeatedly said in recent weeks that they have their hands full integrating the Air Berlin planes into its operations as well as taking over Brussels Airlines.\"A Lufthansa/Etihad pseudo-merger, which is what is being suggested in the press today, presumably encompassing the whole of Alitalia and Air Berlin, looks rather implausible,\" Barclays analysts said in a note.(Reporting by Agnieszka Flak in Milan, Victoria Bryan in Berlin and Alexander Cornwell in Dubai; Editing by Greg Mahlich)\n",
"2273 TOKYO Singapore-based fund Effissimo said on Friday it had bought its 8.14 percent stake in Toshiba Corp ( 6502.T ) because it expects its share price to gain and produce returns though a longer-term increase in corporate value.Effissimo, established by former colleagues of Japan's most famous activist investor, Yoshiaki Murakami, has become the largest shareholder in Toshiba with its stake, a regulatory filing showed on Thursday.Effissimo's purchase of Toshiba shares is worth about 65 billion yen ($584 million), based on its closing price on March 15, the date of ownership shown in the filing.(Reporting by Makiko Yamazaki; Editing by Edwina Gibbs)\n",
"Name: Text, dtype: object\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1d5d56ee6f9a4f91bdeee367ddedbbc5",
"model_id": "69b7db6b4c534e3e93aee26fa05f6d29",
"version_major": 2,
"version_minor": 0
},
@ -600,21 +573,21 @@
"___________________________________________________________________________________________________\n",
"\n",
"\n",
"News article no. 7628:\n",
"News article no. 7288:\n",
"\n",
"HEADLINE:\n",
"7628 EMERGING MARKETS-Emerging FX feel dollar pinch, Turkish assets rattled\n",
"7288 Drax promotes finance head Will Gardiner as next CEO\n",
"Name: Title, dtype: object\n",
"\n",
"TEXT:\n",
"7628 15 AM / Updated 21 minutes ago EMERGING MARKETS-Emerging FX feel dollar pinch, Turkish assets rattled Karin Strohecker 5 Min Read LONDON, Oct 23 (Reuters) - A stronger dollar increased pressure on some emerging currencies on Monday with the Turkish lira and stocks suffering as the latest concerns over Ankaras relationship with Washington compounded the weaker global backdrop. The dollar sailed to the highest level in more than two weeks, still enjoying a boost from U.S. President Donald Trump and Republicans clearing a hurdle on tax reforms last week and speculation over who will take over at the helm of the Federal Reserve. We are seeing increasing pressure on emerging market currencies and that is likely to continue over the near term as we still have a lot of speculation regarding who will succeed Janet Yellen at the Fed, said Phoenix Kalen at Societe Generale. That is weighing on investors minds, alongside the strength of the dollar thats coming from expectations of fiscal and tax reform. The Chinese yuan fell against the U.S. dollar after a weaker midpoint fixing while Mexicos peso weakened 0.2 percent. But Turkeys lira and South Africas rand - both seen as vulnerable to U.S. interest rate rises due to current account deficits - were the hardest hit, weakening for a second straight session. Losses in the lira of more than 1 percent came after Turkeys banking regulator urged the public on Saturday to ignore rumours about financial institutions in an apparent dismissal of a report that some banks face billions of dollars of U.S. fines over alleged violations of Iran sanctions. Given the level of tensions with the U.S., the market is still sceptical about this denial, said Inan Demir at Nomura. The numbers mentioned are large...the largest fine mentioned was $5 billion and that would be a very large fine in comparison to any banks equity in Turkey. Relations between NATO allies Washington and Ankara have been strained by a series of diplomatic rows. Meanwhile U.S. authorities have hit global banks with billions of dollars in fines over violations of sanctions with Iran and other countries in recent years. Adding to the woes was data on consumer confidence, which showed an increasingly pessimistic outlook. Turkish stocks also took a tumble, slipping 0.8 percent while MSCIs emerging market benchmark was flat on the day. Meanwhile in Argentina, candidates allied with President Mauricio Macri enjoyed sweeping victories in Sundays mid-term election, strengthening his position in Congress while dimming prospects for a political comeback by his predecessor Cristina Fernandez. Investors have said they want to see Macri push through labour and tax reforms aimed at lowering business costs in Latin Americas third-biggest economy. But they have been worried about a political resurgence by Fernandez, loved by millions of low-income Argentines helped by generous social spending during her administrations. For GRAPHIC on emerging market FX performance 2017, see tmsnrt.rs/2e7eoml For GRAPHIC on MSCI emerging index performance 2017, see tmsnrt.rs/2dZbdP5 For CENTRAL EUROPE market report, see For TURKISH market report, see For RUSSIAN market report, see) Emerging Markets Prices from Reuters Equities Latest Net Chg % Chg % Chg on year Morgan Stanley Emrg Mkt Indx 1118.54 -1.15 -0.10 +29.72 Czech Rep 1056.24 -0.37 -0.04 +14.61 Poland 2484.09 +18.58 +0.75 +27.53 Hungary 0.00 +0.00 +0.00 -100.00 Romania 7919.00 -14.48 -0.18 +11.77 Greece 743.20 -6.03 -0.80 +15.47 Russia 1130.49 -3.96 -0.35 -1.90 South Africa 51807.55 +206.89 +0.40 +18.01 Turkey 07700.54 -788.15 -0.73 +37.83 China 3382.27 +3.62 +0.11 +8.98 India 32447.30 +57.34 +0.18 +21.86 Currencies Latest Prev Local Local close currency currency\n",
"7288 6:33 AM / Updated 17 minutes ago Drax chief executive Dorothy Thompson to step down Reuters Staff 1 Min Read (Reuters) - British power producer Drax ( DRX.L ) said chief executive Dorothy Thompson will step down after 12 years in the role. The companys current finance chief Will Gardiner will succeed Dorothy Thompson who will step down from the post and leave the group at the end of 2017. Shares of the company, which owns the UKs largest power station, fell 1.5 percent in early trading. Drax is speeding up plans to convert its units to gas. Under pressure from government plans to close all coal plants by 2025, Drax has increasingly turned to burning compressed wood pellets, or biomass. The company will begin the process of appointing a new chief financial officer and will also review the option of making an appointment on an interim basis, it said on Thursday. Reporting by Rahul B and Radhika Rukmangadhan in Bengaluru; Editing by Sunil Nair\n",
"Name: Text, dtype: object\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3e1cbc9e465945d2a36b3a40b738f023",
"model_id": "115d041beea5439db33aec72724ed97c",
"version_major": 2,
"version_minor": 0
},
@ -649,21 +622,21 @@
"___________________________________________________________________________________________________\n",
"\n",
"\n",
"News article no. 4080:\n",
"News article no. 2075:\n",
"\n",
"HEADLINE:\n",
"4080 Japan passes law to tighten regulations on high-frequency trading\n",
"2075 PetroChina 2016 profit sinks 78 percent on lower crude prices\n",
"Name: Title, dtype: object\n",
"\n",
"TEXT:\n",
"4080 Business 10:13am BST Japan passes law to tighten regulations on high-frequency trading TOKYO Japan tightened regulations on high-frequency trading (HFT) this week, passing into law measures that will require HFT firms to register with regulators. Other nations in Europe and elsewhere in Asia are looking to tighten the leash on high-frequency traders who programme ultra-fast computers to trade in milliseconds without human intervention. Some major U.S. exchanges want to introduce speed limits on trading. The growing presence of HFT on the Tokyo Stock Exchange (TSE) has raised concerns high-speed trading could destabilise markets and leave retail investors at a disadvantage. The law was passed by parliament on Wednesday and the new regulations could come into force as early as 2018. Japan's market regulator, the Financial Services Agency (FSA), has said previously it wanted HFT participants to register and to ensure proper risk management measures were in place. \"The definition has not yet been created. We can guess at who might be affected, but we don't know for sure the full scope of who will be affected,\" said Seth Friedman, chief executive of advisory firm Shiroyama Consulting Co.. The new rules stipulate that a company engaging in HFT will have to establish an office in Japan or be represented in the country by an agent. HFT accounted for about 70 percent of orders on the Tokyo Stock Exchange in 2016, FSA estimates show. High-speed trading accounted for slightly less than half of actual traded value, according to market participants, taking into account order cancellations. That would amount to slightly less than 321 trillion yen ($2.9 trillion) based on figures on the TSE website for total trade in cash equity of 643 trillion yen. (Reporting by Lisa Twaronite; Editing by)\n",
"2075 45am BST PetroChina 2016 profit sinks 78 percent on lower crude prices FILE PHOTO: PetroChina's petrol station is pictured in Beijing, China, March 21, 2016. REUTERS/Kim Kyung-Hoon/File Photo BEIJING China's largest oil and gas producer, PetroChina ( 601857.SS ), on Thursday reported a drop of 78 percent in 2016 annual net profit, to its lowest since at least 2011, as it was hit by lower prices for crude oil and natural gas. The shrinking profits posted by China's state oil and gas producers for last year have highlighted their growing challenges from falling output at ageing wells and excess supply in domestic fuel oil markets. PetroChina's net profit sank to 7.86 billion yuan ($1.14 billion) from 35.7 billion yuan in 2015, while revenue fell 6.3 percent to 1.62 trillion yuan ($235 billion), based on IFRS accounting standards. PetroChina's crude oil production fell 5.3 percent to 920.7 million barrels in 2016 - still the highest among global oil producers including BP ( BP.L ) and Shell ( RDSa.L ) - but marking the lowest for PetroChina since 2012, according to Reuters data. The state company's crude oil output peaked in 2015 at 972 million barrels. PetroChina's total oil and gas output for the year was 1.47 billion barrels of oil equivalent, down 1.8 percent from 2015. PetroChina had 7.44 billion barrels of proven crude oil reserves, down 12.7 percent from 2015, it said. In its annual report, the company said domestic gasoline demand was lower than expected, while diesel consumption fell. \"The situation of excessive supply in domestic refined products became severe\" last year, it said. \"The quantity of imported and processed crude oil, operating capacity, and market shares of local refineries (all) increased significantly, leading to fiercer market competition.\" PetroChina's smaller upstream competitor CNOOC ( 0883.HK ) - a specialist in offshore operations - earlier reported its worst result since 2011, but forecast its output to rise this year. Profits at Sinopec ( 600028.SS ) - Asia's largest refiner - rose 44 percent from a year earlier on the back of strong performances in refining and chemicals. Sinopec's oil and gas production in 2016, however, fell 8.6 percent to 431.29 million barrels of oil equivalent versus 471.91 million a year earlier. ($1 = 6.8895 Chinese yuan) (Reporting by Josephine Mason and Meng Meng; Editing by Tom Hogue) Next In Business News\n",
"Name: Text, dtype: object\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "81ba9eef3ac049f58b57a9300e88a20c",
"model_id": "ed4f02b48baa45af89069cb5c0d1fc3f",
"version_major": 2,
"version_minor": 0
},
@ -698,21 +671,21 @@
"___________________________________________________________________________________________________\n",
"\n",
"\n",
"News article no. 849:\n",
"News article no. 2166:\n",
"\n",
"HEADLINE:\n",
"849 BRIEF-Snap Inc's initial valuation at $19.5 bln to $22.2 bln- CNBC, citing DJ\n",
"2166 BRIEF-Village Farms announces year end 2016 results\n",
"Name: Title, dtype: object\n",
"\n",
"TEXT:\n",
"849 Company News - Thu Feb 16, 2017 - 12:11am EST BRIEF-Snap Inc's initial valuation at $19.5 bln to $22.2 bln- CNBC, citing DJ Feb 16 (Reuters) - * Snap Inc sets initial valuation at $19.5 billion to $22.2 billion, or $14 to $16 per share, near low end of its targeted range - CNBC, citing Dow Jones Next In Company News Morning News Call - India, February 16 To access the newsletter, click on the link: http://share.thomsonreuters.com/assets/newsletters/Indiamorning/MNC_IN_02162017.pdf If you would like to receive this newsletter via email, please register at: https://forms.thomsonreuters.com/india-morning/ FACTORS TO WATCH 10:00 am: Junior Finance Minister Arjun Ram Meghwal at CII event in New Delhi. LIVECHAT: COMMODITIES OUTLOOK Oil markets remain under pressure as crude supplies remain bloated despite th MORE FROM REUTERS From Around the Web Promoted by Revcontent Trending Stories\n",
"2166 16am EDT BRIEF-Village Farms announces year end 2016 results March 31 Village Farms International Inc * Village Farms announces year end 2016 results * Village Farms International - sales for 3 months ended Dec 31, 2016 increased by $2,187, or 6%, to $37,308 from $35,121 for 3 months ended Dec 31, 2015 * Village Farms International Inc - net income for 3 months ended Dec 31, 2016 decreased by $2,033 to $453 from $2,486 for 3 months ended Dec 31, 2015 Source text for Eikon: \n",
"Name: Text, dtype: object\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "dc76006231a741838d98f30b56208934",
"model_id": "80563e14fa554ca082f8e656e768ad3b",
"version_major": 2,
"version_minor": 0
},
@ -747,21 +720,21 @@
"___________________________________________________________________________________________________\n",
"\n",
"\n",
"News article no. 2569:\n",
"News article no. 29:\n",
"\n",
"HEADLINE:\n",
"2569 BRIEF-Supremex announces appointment of Bertrand Jolicoeur as CFO\n",
"29 ECB has told several banks to submit plans on bad loan by end-Feb - source\n",
"Name: Title, dtype: object\n",
"\n",
"TEXT:\n",
"2569 23am EDT BRIEF-Supremex announces appointment of Bertrand Jolicoeur as CFO April 20 Supremex Inc * Supremex announces appointment of Chief Financial Officer and strengthens executive team * Says announced appointment of Bertrand Jolicoeur as Chief Financial Officer * Says Lyne Bgin, interim vice-president of finance, will return to her role as corporate controller Source text for Eikon: \n",
"29 Business News - Mon Jan 30, 2017 - 1:08pm GMT ECB has told several banks to submit plans on bad loan by end-Feb - source The European Central Bank (ECB) headquarters is pictured in Frankfurt, Germany, December 8, 2016. REUTERS/Ralph Orlowski MILAN The European Central Bank has asked several banks to submit a plan by the end of February spelling out how they intend to reduce their problematic loans, a source familiar with the matter said on Monday. The source, who declined to name the banks involved, said the request was a follow-up to the ECB's new guidance on non-performing loans issued last year. Italy's biggest bank by assets UniCredit ( CRDI.MI ) earlier on Monday said it had been requested to present such a plan by Feb. 28. Genoa-based Banca Carige ( CRGI.MI ) must also submit its own plan by that deadline. Italian banks are saddled with 356 billion euros ($378 billion) of soured debts, a third of the euro zone's total, accumulated during a long recession. (Reporting by Silvia Aloisi, editing by Luca Trogni) Next In Business News Oil steady but U.S. drilling weakens deal to cut output LONDON Oil prices were steady on Monday, but news of another increase in U.S. drilling activity spread concern over rising output just as many of the world's oil producers are trying to comply with a deal to pump less in an attempt to prop up prices.\n",
"Name: Text, dtype: object\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "111da7a4a7244fe1849502554ffc20c7",
"model_id": "1a821243ed1548409bdd2d8736094218",
"version_major": 2,
"version_minor": 0
},
@ -796,21 +769,21 @@
"___________________________________________________________________________________________________\n",
"\n",
"\n",
"News article no. 1854:\n",
"News article no. 87:\n",
"\n",
"HEADLINE:\n",
"1854 BRIEF-Korea Aerospace Industries selects Triumph for kf-x airframe\n",
"87 Investment Focus: History suggests Trump month will be stocks down, dollar up\n",
"Name: Title, dtype: object\n",
"\n",
"TEXT:\n",
"1854 56pm EST BRIEF-Korea Aerospace Industries selects Triumph for kf-x airframe March 1 Triumph Group Inc * Triumph awarded contract with korea Aerospace Industries for kf-x airframe mounted accessory drive * Selected by Korea Aerospace Industries, ltd to provide airframe mounted accessory drives (amad) on new kf-x fighter aircraft Source text for Eikon: \n",
"87 Business News 9:18am EST Investment Focus: History suggests Trump month will be stocks down, dollar up FILE PHOTO - Republican U.S. presidential candidate Donald Trump poses for a photo after an interview with Reuters in his office in Trump Tower, in the Manhattan borough of New York City, U.S., May 17, 2016. REUTERS/Lucas Jackson/File Photo By Jamie McGeever and Marc Jones - LONDON LONDON For financial markets, the Trump era begins on Monday, and if history is any guide the following month should be a rocky one for Wall Street but positive for the dollar. The S&P 500 .SPX has fallen a median 2.7 percent in the month after each new president has taken the keys to the White House since Herbert Hoover did so in January 1929, according to Reuters analysis. Only four presidents have seen Wall Street rise in their first month in power: Hoover (+3.8 percent), John F. Kennedy in 1961 (+6 pct), George H. W. Bush in 1989 (+5.3 pct) and Bill Clinton in 1993 (0.8 pct). The market has fallen in the first month under every other incoming president since Hoover. Even Ronald Reagan and Barack Obama, who ultimately presided over 120 percent and 165 percent rallies on Wall Street during their two terms, respectively, saw initial slides of 4.8 percent and 15 percent. The dollar tends to fare better. Analysis going back to the early 1970s when the currency was taken off the gold standard shows it has risen an average 2.2 percent in the first month of a first-time president. Donald Trump takes office as the 45th president of the United States with investor apprehension over an incoming president has rarely been higher. \"There are two sides to Trump, the one side focusing on U.S. stimulus which drives up global growth and the other side, the protectionist Donald Trump that could do the opposite. So the big question is which will we get?,\" said State Street Global Advisors' EMEA head of currencies James Binny. Markets latched on after Trump won the November election to his reflationary and pro-growth stance: stocks rose to new highs, the bond selloff deepened, and the dollar clocked a 14-year peak against the euro. But as the inauguration has drawn closer, that momentum has faded. This week, the Dow Jones .DJI and dollar .DXY hit six-week lows, the 10-year U.S. Treasury yield its lowest since late November US10YT=RR, and gold rose to its highest in two months XAU=. Some investors are playing safe. \"We are neutral, because we don't know exactly what direction Trump will take,\" said Lukas Daadler, chief investment officer of investment solutions at Robeco, a subsidiary of Robeco Group. The latter has 269 billion euros in assets under management. \"There is some extreme positioning out there, so there's the risk of a short squeeze. But we've taken a neutral stance, and we might see more detail on his plans next week.\" Much of that positioning is in the U.S. bond market and the dollar. Speculators have amassed record bets against 10-year Treasuries, and according to Bank of America Merrill Lynch's January fund manager survey, the most overcrowded trade in the world now is the pro-dollar trade. BAML strategists said on Friday that although there has been a clear cooling of \"Trump trade\" bets in recent weeks, overall investor sentiment is its highest in three months. They recommend sticking with they call the \"Icarus trade\" - one last 10 percent rise in stocks and commodities before the rally ends. For graphic on markets one month into presidency: reut.rs/2k8p0Ui The Presidential Touch: tmsnrt.rs/2j1OyVe (Graphic by Vikram Subhedar; Editing by Jeremy Gaunt) Next In Business News\n",
"Name: Text, dtype: object\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "62e65ecb60f04c93be7b9d493c90db9c",
"model_id": "34dd05dfd3ff4286b08b7a78af09ec81",
"version_major": 2,
"version_minor": 0
},
@ -845,21 +818,21 @@
"___________________________________________________________________________________________________\n",
"\n",
"\n",
"News article no. 6091:\n",
"News article no. 3431:\n",
"\n",
"HEADLINE:\n",
"6091 BAT changes regional management structure after Reynolds deal\n",
"3431 Japan proposes expanding bilateral FX swap scheme with ASEAN\n",
"Name: Title, dtype: object\n",
"\n",
"TEXT:\n",
"6091 August 31, 2017 / 6:46 AM / 2 hours ago BAT restructures to help e-cigarettes go mainstream Justin George Varghese and Martinne Geller 4 Min Read Attendees try British American Tobacco's new tobacco heating system device 'glo' after a news conference in Tokyo, Japan, November 8, 2016. Kim Kyung-Hoon (Reuters) - British American Tobacco ( BATS.L ) has reorganized its regional management structure to integrate its vaping products with its core business, in a push by the worlds biggest listed tobacco company to help cigarette alternatives go mainstream. The move, announced on Thursday, follows the companys $49 billion (38 billion pounds) takeover of U.S. peer Reynolds American, which added Camel cigarettes and Vuse e-cigarettes to a BAT portfolio that includes Lucky Strike cigarettes, Vype e-cigarettes and the glo tobacco-heating device. \"Now that we have built a successful NGP (next generation products) business which is poised for substantial growth, we will be fully integrating NGP to leverage the scale and expertise of the whole group to drive growth in an area that is fast becoming a key part of our mainstream business,\" BAT said in a statement. BAT wants to double the number of countries where it sells vaping products this year and again in 2018, as it jostles for position in a growing market against rivals Philip Morris International ( PM.N ) and Imperial Brands ( IMB.L ). BAT and Philip Morris were the first of the big tobacco firms to invest in cigarette alternatives a few year back, as growing health consciousness reduces traditional smoking. Philip Morris, maker of Marlboro cigarettes, is ahead of BAT in the market for tobacco-based vaping devices, which some analysts believe will be more popular than traditional e-cigarettes with regular smokers, and its shares have been at a bigger premium to its peers. ( bit.ly/2xOLU9R ) Last month, the U.S. Food and Drug Administration (FDA) proposed cutting nicotine in cigarettes to \"non-addictive\" levels in a push to move smokers towards potentially less harmful e-cigarettes. Under the management reorganization announced on Thursday BAT appointed Asia-Pacific Director Jack Bowles to the newly created role of chief operating officer for the international business, excluding the United States. Shares were up around 1.5 percent at 1322 GMT on Thursday. Jefferies analyst Owen Bennett said the changes could add some uncertainty for BAT in the near term, but in the longer term it reinforced the importance of cigarette alternatives to tobacco companies, which face slowing sales globally. \"Whereas those companies that were better positioned for emerging market growth in the past were favoured, the key differentiator now is likely to be who is positioned best in emerging products, given the recent slowdown in emerging market cigarettes,\" the analyst said. Japan Tobacco said last week it would buy the Philippines' No. 2 cigarette maker Mighty Corp for about $936 million, its second large deal in Southeast Asia this month, as it deepens its push into emerging markets. British American Tobacco vs Philip Morris (YTD) bit.ly/2xOLU9R Reporting By Justin George Varghese in Bengaluru and Martinne Geller in London; Editing by Greg Mahlich and Susan Thomas\n",
"3431 Economy 3:25am BST Japan proposes expanding bilateral FX swap scheme with ASEAN Light is cast on a Japanese 10,000 yen note as it's reflected in a plastic board in Tokyo, in this February 28, 2013 picture illustration. REUTERS/Shohei Miyano/Illustration/File Photo YOKOHAMA, Japan Japan's Ministry of Finance on Friday proposed launching bilateral foreign exchange swap arrangements of up to 40 billion dollars with Southeast Asian nations to enable Tokyo to provide yen funds to these countries during times of financial crisis. The proposal was made during a meeting between finance ministers and central bank governors from Japan and the members of Association of Southeast Asian Nations (ASEAN) in Japan, the ministry said in a statement. The move is aimed at making yen funds more accessible to Japanese firms increasing their presence in Southeast Asia as well as others in need of the Japanese currency in case of financial stress. The scheme would allow each country to choose either the yen or the dollar in receiving funds under the bilateral arrangements in response to liquidity crisis. Separately on Friday, Japan entered into bilateral currency swap arrangement worth 3 billion dollars with Thailand and agreed to enter a similar arrangement with Malaysia under a current swap framework. (Reporting by Tetsushi Kajimoto; Editing by Sam Holmes)\n",
"Name: Text, dtype: object\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a3c29fd05f7247e0b125aa867601a89b",
"model_id": "7c13276edecd491383ea9df15750c974",
"version_major": 2,
"version_minor": 0
},
@ -904,41 +877,13 @@
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# create button widget for confirming labels\n",
"button_confirm = widgets.Button(description='Confirm Labels',\n",
" disabled=False,\n",
" button_style='')\n",
"\n",
"def g(b):\n",
" ''' this function is executed when button_confirm clicked\n",
" ''' \n",
" # show new labels\n",
" print(df.loc[df['Index'].isin(label_next)]['Label'])\n",
"\n",
"# execute function g if button is clicked\n",
"button_confirm.on_click(g)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"PLEASE CLICK THE BUTTON ('Confirm Labels') BELOW TO CONFIRM YOUR LABELS."
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7de4c71a9825428bae7cbe4b6874a1b6",
"model_id": "5c1ac5ca6b1248d0a7e81fcd52cb4523",
"version_major": 2,
"version_minor": 0
},
@ -968,27 +913,80 @@
"name": "stdout",
"output_type": "stream",
"text": [
"475 2.0\n",
"849 0.0\n",
"1854 0.0\n",
"2569 0.0\n",
"4080 0.0\n",
"4185 0.0\n",
"5874 0.0\n",
"6091 5.0\n",
"7628 0.0\n",
"8684 0.0\n",
"29 0.0\n",
"87 0.0\n",
"1178 0.0\n",
"2075 0.0\n",
"2166 0.0\n",
"2273 4.0\n",
"2614 0.0\n",
"3431 0.0\n",
"6380 0.0\n",
"7288 0.0\n",
"Name: Label, dtype: float64\n",
"29 0.0\n",
"87 0.0\n",
"1178 0.0\n",
"2075 0.0\n",
"2166 0.0\n",
"2273 4.0\n",
"2614 0.0\n",
"3431 0.0\n",
"6380 0.0\n",
"7288 0.0\n",
"Name: Label, dtype: float64\n"
]
}
],
"source": [
"# create button widget for confirming labels\n",
"button_confirm = widgets.Button(description='Confirm Labels',\n",
" disabled=False,\n",
" button_style='')\n",
"\n",
"def g(b):\n",
" ''' this function is executed when button_confirm clicked\n",
" ''' \n",
" # show new labels\n",
" print(df.loc[df['Index'].isin(label_next)]['Label'])\n",
"\n",
"# execute function g if button is clicked\n",
"button_confirm.on_click(g)\n",
"\n",
"display(button_confirm)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"PLEASE CLICK THE BUTTON ABOVE ('Confirm Labels') TO CONFIRM YOUR LABELS."
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 79,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This round (no. 3):\n",
"Number of labeled articles: 30\n",
"Number of unlabeled articles: 9970\n"
]
}
],
"source": [
"print('This round (no. {}):'.format(m))\n",
"print('Number of labeled articles: {}'.format(len(l_data)))\n",
"print('Number of unlabeled articles: {}'.format(len(u_data)))"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
@ -1003,40 +1001,18 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 82,
"metadata": {},
"outputs": [],
"source": [
" \n",
"# split data set into labeled and unlabeled samples\n",
" # split data set into labeled and unlabeled samples\n",
"l_data = df.loc[df['Label'] != -1]\n",
"u_data = df.loc[df['Label'] == -1]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This round (no. 1):\n",
"Number of labeled articles: 10\n",
"Number of unlabeled articles: 9990\n"
]
}
],
"source": [
"print('This round (no. {}):'.format(m))\n",
"print('Number of labeled articles: {}'.format(len(l_data)))\n",
"print('Number of unlabeled articles: {}'.format(len(u_data)))"
]
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -1049,7 +1025,7 @@
"\n",
"# BOW: making vocabulary of data set...\n",
"\n",
"# BOW: vocabulary consists of 947 features.\n",
"# BOW: vocabulary consists of 1938 features.\n",
"\n",
"# MNB: fit training data and calculate matrix...\n",
"\n",
@ -1060,13 +1036,7 @@
"# MNB: transform testing data to matrix...\n",
"\n",
"# BOW: extracting all words from articles...\n",
"\n",
"# BOW: calculating matrix...\n",
"\n",
"# BOW: calculating frequencies...\n",
"\n",
"# MNB: ending multinomial naive bayes\n",
"Wall time: 21min 5s\n"
"\n"
]
}
],
@ -1077,7 +1047,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 56,
"metadata": {},
"outputs": [
{
@ -1096,7 +1066,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 57,
"metadata": {},
"outputs": [
{
@ -1105,16 +1075,16 @@
"text": [
"First 10 estimations:\n",
"\n",
"[[0.33335808 0.33330861 0.33333331]\n",
" [0.33335762 0.33331573 0.33332665]\n",
" [0.33336293 0.33332051 0.33331657]\n",
" [0.3333764 0.33330196 0.33332164]\n",
" [0.33338693 0.33331068 0.33330239]\n",
" [0.33336951 0.33332574 0.33330475]\n",
" [0.33341934 0.3333032 0.33327745]\n",
" [0.33349444 0.33324293 0.33326262]\n",
" [0.33337205 0.3333111 0.33331685]\n",
" [0.3333632 0.33331446 0.33332234]]\n"
"[[0.3333692 0.33330931 0.33332149]\n",
" [0.33336935 0.33330722 0.33332343]\n",
" [0.33338453 0.33329506 0.33332041]\n",
" [0.33338619 0.33328946 0.33332435]\n",
" [0.33341376 0.3332979 0.33328834]\n",
" [0.33337418 0.33331362 0.3333122 ]\n",
" [0.33345864 0.33328092 0.33326044]\n",
" [0.33359888 0.33318753 0.33321359]\n",
" [0.33339256 0.33329162 0.33331582]\n",
" [0.33342884 0.33327653 0.33329462]]\n"
]
}
],
@ -1126,7 +1096,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
@ -1156,14 +1126,14 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of auto-labeled samples in round 1: 0\n"
"Number of auto-labeled samples in round 2: 0\n"
]
}
],
@ -1173,7 +1143,7 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
@ -1185,15 +1155,8 @@
" ignore_index=True)\n",
"\n",
"# sort dataframe by index\n",
"df = df.sort_values(['Index'])"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"df = df.sort_values(['Index'])\n",
"\n",
"# create button widget for checking labels\n",
"button_check = widgets.Button(description='Check Label',\n",
" disabled=False,\n",
@ -1216,16 +1179,16 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 61,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"End of this round (no. 1):\n",
"Number of labeled articles: 10\n",
"Number of unlabeled articles: 9990\n"
"End of this round (no. 2):\n",
"Number of labeled articles: 20\n",
"Number of unlabeled articles: 9980\n"
]
}
],
@ -1237,7 +1200,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [